library(readr)  
library(ggplot2)  
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(naniar)
library(tidyr)

file_path <- "/Users/eylulruyagullu/Desktop/match_data"
match_data <- read_csv(file_path)
## Rows: 63944 Columns: 106
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (5): halftime, name, current_state, final_score, result
## dbl  (94): fixture_id, minute, second, 1, 2, X, Accurate Crosses - away, Acc...
## lgl   (3): suspended, stopped, ticking
## dttm  (4): current_time, half_start_datetime, match_start_datetime, latest_b...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
match_data_2 <- match_data

head(match_data)
## # A tibble: 6 × 106
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19172016 1st-half 2024-08-09 18:11:36 2024-08-09 18:01:37
## 2   19172016 1st-half 2024-08-09 18:27:30 2024-08-09 18:01:37
## 3   19172016 1st-half 2024-08-09 18:28:25 2024-08-09 18:01:37
## 4   19172016 1st-half 2024-08-09 18:29:32 2024-08-09 18:01:37
## 5   19172016 1st-half 2024-08-09 18:30:37 2024-08-09 18:01:37
## 6   19172016 1st-half 2024-08-09 18:31:18 2024-08-09 18:01:37
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
nrow(match_data)
## [1] 63944
ncol(match_data)
## [1] 106
for (col in colnames(match_data)) {
    print(col)
}
## [1] "fixture_id"
## [1] "halftime"
## [1] "current_time"
## [1] "half_start_datetime"
## [1] "match_start_datetime"
## [1] "minute"
## [1] "second"
## [1] "latest_bookmaker_update"
## [1] "suspended"
## [1] "stopped"
## [1] "1"
## [1] "2"
## [1] "X"
## [1] "name"
## [1] "ticking"
## [1] "Accurate Crosses - away"
## [1] "Accurate Crosses - home"
## [1] "Assists - away"
## [1] "Assists - home"
## [1] "Attacks - away"
## [1] "Attacks - home"
## [1] "Ball Possession % - away"
## [1] "Ball Possession % - home"
## [1] "Ball Safe - away"
## [1] "Ball Safe - home"
## [1] "Challenges - away"
## [1] "Challenges - home"
## [1] "Corners - away"
## [1] "Corners - home"
## [1] "Counter Attacks - away"
## [1] "Counter Attacks - home"
## [1] "Dangerous Attacks - away"
## [1] "Dangerous Attacks - home"
## [1] "Dribble Attempts - away"
## [1] "Dribble Attempts - home"
## [1] "Fouls - away"
## [1] "Fouls - home"
## [1] "Free Kicks - away"
## [1] "Free Kicks - home"
## [1] "Goal Attempts - away"
## [1] "Goal Attempts - home"
## [1] "Goal Kicks - away"
## [1] "Goal Kicks - home"
## [1] "Goals - away"
## [1] "Goals - home"
## [1] "Headers - away"
## [1] "Headers - home"
## [1] "Hit Woodwork - away"
## [1] "Hit Woodwork - home"
## [1] "Injuries - away"
## [1] "Injuries - home"
## [1] "Interceptions - away"
## [1] "Interceptions - home"
## [1] "Key Passes - away"
## [1] "Key Passes - home"
## [1] "Long Passes - away"
## [1] "Long Passes - home"
## [1] "Offsides - away"
## [1] "Offsides - home"
## [1] "Passes - away"
## [1] "Passes - home"
## [1] "Penalties - away"
## [1] "Penalties - home"
## [1] "Redcards - away"
## [1] "Redcards - home"
## [1] "Saves - away"
## [1] "Saves - home"
## [1] "Score Change - away"
## [1] "Score Change - home"
## [1] "Shots Blocked - away"
## [1] "Shots Blocked - home"
## [1] "Shots Insidebox - away"
## [1] "Shots Insidebox - home"
## [1] "Shots Off Target - away"
## [1] "Shots Off Target - home"
## [1] "Shots On Target - away"
## [1] "Shots On Target - home"
## [1] "Shots Outsidebox - away"
## [1] "Shots Outsidebox - home"
## [1] "Shots Total - away"
## [1] "Shots Total - home"
## [1] "Substitutions - away"
## [1] "Substitutions - home"
## [1] "Successful Dribbles - away"
## [1] "Successful Dribbles - home"
## [1] "Successful Headers - away"
## [1] "Successful Headers - home"
## [1] "Successful Interceptions - away"
## [1] "Successful Interceptions - home"
## [1] "Successful Passes - away"
## [1] "Successful Passes - home"
## [1] "Successful Passes Percentage - away"
## [1] "Successful Passes Percentage - home"
## [1] "Tackles - away"
## [1] "Tackles - home"
## [1] "Throwins - away"
## [1] "Throwins - home"
## [1] "Total Crosses - away"
## [1] "Total Crosses - home"
## [1] "Yellowcards - away"
## [1] "Yellowcards - home"
## [1] "Yellowred Cards - away"
## [1] "Yellowred Cards - home"
## [1] "current_state"
## [1] "final_score"
## [1] "result"

In order to analyze the number of minutes for each match, grouping was done in the code based on fixture_id. In the grouped data, the total minutes recorded for each match were calculated and the results were summarized under the minute_count variable. This summary table was then sorted by the number of minutes and arranged for a detailed analysis. In this way, the data for each match was made easier to compare based on minute records.

match_data_summary <- match_data %>%
  group_by(fixture_id) %>%
  summarise(minute_count = n()) %>%
  arrange(minute_count)

print(match_data_summary)
## # A tibble: 648 × 2
##    fixture_id minute_count
##         <dbl>        <int>
##  1   19172016           39
##  2   19172012           47
##  3   19172089           52
##  4   19172085           54
##  5   19172014           70
##  6   19155091           76
##  7   19172043           81
##  8   19139670           84
##  9   19172044           85
## 10   19155095           86
## # ℹ 638 more rows

The main purpose of this code is to clean the data by removing duplicate rows from the match_data dataset. First, it sorts the data based on the fixture_id and current_time columns, then removes duplicates that contain the same values. Finally, it calculates the number of deleted rows.

match_data <- match_data[order(match_data$fixture_id, match_data$current_time), ]
rownames(match_data) <- NULL

before_removal <- nrow(match_data)

match_data <- match_data[!duplicated(match_data[, c("fixture_id", "current_time")]), ]

after_removal <- nrow(match_data)

cat("Amount of deleted rows:", before_removal - after_removal, "\n")
## Amount of deleted rows: 0
print(head(match_data))
## # A tibble: 6 × 106
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
match_data
## # A tibble: 63,944 × 106
##    fixture_id halftime current_time        half_start_datetime
##         <dbl> <chr>    <dttm>              <dttm>             
##  1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
##  2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
##  3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
##  4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
##  5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
##  6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
##  7   19134453 1st-half 2024-08-16 19:07:18 2024-08-16 19:00:31
##  8   19134453 1st-half 2024-08-16 19:08:19 2024-08-16 19:00:31
##  9   19134453 1st-half 2024-08-16 19:09:19 2024-08-16 19:00:31
## 10   19134453 1st-half 2024-08-16 19:10:18 2024-08-16 19:00:31
## # ℹ 63,934 more rows
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>, …
nrow(match_data)
## [1] 63944
table(match_data$suspended)
## 
## FALSE  TRUE 
## 56127  7817
table(match_data$stopped)
## 
## FALSE  TRUE 
## 59829  4115

The main purpose of this code is to clean the missing and erroneous data in the match_data dataset. First, the rows with suspended and stopped values as False are filtered. Then, the values in the critical columns are converted to appropriate numeric and date formats, and the erroneous data are marked as NA. Finally, the missing (NA) values in the critical columns are checked and these rows are removed from the dataset. After cleaning, the number of rows in the dataset is reported to the user.

cat("Amount of rows before data cleaning:", nrow(match_data), "\n")
## Amount of rows before data cleaning: 63944
match_data <- subset(match_data, suspended == FALSE & stopped == FALSE)
cat("Amount of rows after data cleaning 1:", nrow(match_data), "\n")
## Amount of rows after data cleaning 1: 56127
match_data$`1` <- suppressWarnings(as.numeric(match_data$`1`))
match_data$X <- suppressWarnings(as.numeric(match_data$X))
match_data$`2` <- suppressWarnings(as.numeric(match_data$`2`))

match_data$current_time <- as.POSIXct(match_data$current_time, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
match_data$half_start_datetime <- as.POSIXct(match_data$half_start_datetime, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
match_data$latest_bookmaker_update <- as.POSIXct(match_data$latest_bookmaker_update, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")

critical_columns <- c("current_time", "half_start_datetime", "1", "X", "2")
match_data <- match_data[complete.cases(match_data[, critical_columns]), ]

cat("Amount of rows after data cleaning 2:", nrow(match_data), "\n")
## Amount of rows after data cleaning 2: 56127
print(head(match_data))
## # A tibble: 6 × 106
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 102 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …

TASK 1 TASK 1.1 and TASK 1.2

match_data <- match_data %>%
  mutate(
    Total_odds = (1 / `1`) + (1 / `2`) + (1 / `X`),
    P_home = (1 / `1`) / Total_odds,
    P_away = (1 / `2`) / Total_odds,
    P_draw = (1 / `X`) / Total_odds,
    P_home_minus_P_away = P_home - P_away
  )

match_data <- match_data %>%
  mutate(
total_prob = P_home + P_draw + P_away,  
      P_home_norm = (P_home / total_prob),
      P_draw_norm = (P_draw / total_prob),
      P_away_norm = (P_away / total_prob)
    )

first_half <- match_data %>% filter(halftime == "1st-half")
second_half <- match_data %>% filter(halftime == "2nd-half")
head(first_half)
## # A tibble: 6 × 115
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
head(second_half)
## # A tibble: 6 × 115
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19134453 2nd-half 2024-08-16 20:04:18 2024-08-16 20:02:38
## 2   19134453 2nd-half 2024-08-16 20:05:18 2024-08-16 20:02:38
## 3   19134453 2nd-half 2024-08-16 20:06:18 2024-08-16 20:02:38
## 4   19134453 2nd-half 2024-08-16 20:07:19 2024-08-16 20:02:38
## 5   19134453 2nd-half 2024-08-16 20:08:18 2024-08-16 20:02:38
## 6   19134453 2nd-half 2024-08-16 20:09:18 2024-08-16 20:02:38
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
check_normalization_count <- function(data) {
  normalized_total <- data$P_home_norm + data$P_draw_norm + data$P_away_norm
  count_greater_than_one <- sum(normalized_total > 1.001)
  return(count_greater_than_one)
}

first_half_issues_count <- check_normalization_count(first_half)
second_half_issues_count <- check_normalization_count(second_half)

TASK 1.3 - First Half

first_half <- first_half %>%
  filter(!is.na(`1`) & !is.na(`2`))  


ggplot(first_half, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +
  labs(title = "First Half - P(Home Win) - P(Away Win) vs P(Draw)",
       x = "P(Home Win) - P(Away Win)",
       y = "P(Draw)") +
  theme_minimal()

bins <- seq(-1, 1, by = 0.2)  

binned_data <- first_half %>%
  mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
  group_by(bin) %>%
  summarise(
    total_games = n(),                        
    draws = sum(result == "X"),               
    empirical_P_tie = draws / total_games,    
    avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)  
  ) %>%
  filter(!is.na(bin))  

ggplot(binned_data, aes(x = bin)) +
  geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
  geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
  geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
  labs(title = "First Half - Empirical vs Bookmaker P(Draw) by Bins",
       x = "P(Home Win) - P(Away Win) Bins",
       y = "Probability of Draw") +
  theme_minimal()

num_bins <- 20

coeffs_1st_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = first_half)
first_half$predicted_draw <- predict(coeffs_1st_half, newdata = first_half)

breaks <- seq(min(first_half$P_home_minus_P_away), max(first_half$P_home_minus_P_away), length.out = num_bins + 1)
first_half$P_home_minus_P_away_bin <- cut(first_half$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)

actual_probabilities_first <- first_half %>%
  group_by(P_home_minus_P_away_bin) %>%
  summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))

bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2

actual_probabilities_first <- actual_probabilities_first %>%
  mutate(bin_center = bin_centers)

coeffs_1st_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_first)

predicted_probabilities <- predict(coeffs_1st_half_actual, newdata = actual_probabilities_first)

ggplot(first_half, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +  # Bookmaker probabilities
  geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) +  # Bookmaker Trend Line
  geom_point(data = actual_probabilities_first, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +  
  geom_line(data = actual_probabilities_first, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +  
  labs(
    title = "P(Home Win) - P(Away Win) vs P(Draw) (1st Half) with Actual Outcome Trend",
    x = "P(Home Win) - P(Away Win)",
    y = "P(Draw)"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

TASK 1.3 - Second Half

second_half <- second_half %>%
  filter(!is.na(`1`) & !is.na(`2`))  

ggplot(second_half, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +
  labs(title = "Second Half - P(Home Win) - P(Away Win) vs P(Draw)",
       x = "P(Home Win) - P(Away Win)",
       y = "P(Draw)") +
  theme_minimal()

bins <- seq(-1, 1, by = 0.2)  

binned_data <- second_half %>%
  mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
  group_by(bin) %>%
  summarise(
    total_games = n(),                        
    draws = sum(result == "X"),               
    empirical_P_tie = draws / total_games,    
    avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)  
  ) %>%
  filter(!is.na(bin))  

ggplot(binned_data, aes(x = bin)) +
  geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
  geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
  geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
  labs(title = "Second Half - Empirical vs Bookmaker P(Draw) by Bins",
       x = "P(Home Win) - P(Away Win) Bins",
       y = "Probability of Draw") +
  theme_minimal()

The blue dots on the graph are below or above the red bars, indicating that the predicted Draw probabilities (the values ​​provided by the bookmaker) are lower or higher than the observed Draw probabilities (empirical P(tie)). If the blue dots are above the red bars, the Draw probability given by the bookmaker tends to be lower than the actual data. In this case, betting on Draw can be profitable in the long run, because the bookmaker gives a lower probability, while the actual probability is higher. However, there are also cases where the blue dots are below the red bars. This indicates that the Draw probability given by the bookmaker is higher than the actual probability, and in such a case, betting on Draw may not be profitable in the long run. Such analyses provide an important tool for identifying opportunities in betting strategies. The lower number of blue dots at the top in the second half suggests that the Draw odds were more consistent and predictable. This could mean that the bookmaker’s predictions were more accurate in the second half and that the outcome of the match was less uncertain than in the first half. This consistency could have caused the blue dots to be less likely to be at the top of the red columns.

num_bins <- 20

coeffs_2nd_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = second_half)
second_half$predicted_draw <- predict(coeffs_2nd_half, newdata = second_half)

breaks <- seq(min(second_half$P_home_minus_P_away), max(second_half$P_home_minus_P_away), length.out = num_bins + 1)
second_half$P_home_minus_P_away_bin <- cut(second_half$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)

actual_probabilities_second <- second_half %>%
  group_by(P_home_minus_P_away_bin) %>%
  summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))

bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2

actual_probabilities_second <- actual_probabilities_second %>%
  mutate(bin_center = bin_centers)

coeffs_2nd_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_second)

predicted_probabilities <- predict(coeffs_2nd_half_actual, newdata = actual_probabilities_second)

ggplot(second_half, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +  # Bookmaker probabilities
  geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) +  # Bookmaker Trend Line
  geom_point(data = actual_probabilities_second, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +  
  geom_line(data = actual_probabilities_second, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +  
  labs(
    title = "P(Home Win) - P(Away Win) vs P(Draw) (2nd Half) with Actual Outcome Trend",
    x = "P(Home Win) - P(Away Win)",
    y = "P(Draw)"
  ) +
  theme_minimal()

TASK 2 The code filters out matches that scored after the 90th minute and received a red card before the 10th minute. First, it looks at the differences in the number of goals before and after the 90th minute and identifies matches that show these differences. It then also excludes matches that resulted in early red cards and removes these matches from the match_data_special data frame. As a result, matches that meet certain criteria are removed from the data set and the number of excluded matches is calculated.

#After 90th minute goals elimination
events_after_90 <- match_data %>%
  filter(minute > 90)

goals_before_90 <- match_data %>%
  filter(minute <= 90) %>%
  select(fixture_id, `Goals - home`, `Goals - away`) %>%
  distinct()

goals_after_90 <- events_after_90 %>%
  select(fixture_id, `Goals - home`, `Goals - away`) %>%
  distinct()

matches_with_diff_goals <- goals_after_90 %>%
  inner_join(goals_before_90, by = "fixture_id", suffix = c("_after_90", "_before_90")) %>%
  filter(`Goals - away_after_90` != `Goals - away_before_90` | 
         `Goals - home_after_90` != `Goals - home_before_90`) %>%
  pull(fixture_id) %>%
  unique()

#Early red card matches elimination
early_red_card_matches <- match_data %>%
  filter(minute < 10, (`Redcards - home` > 0 | `Redcards - away` > 0)) %>%
  pull(fixture_id) %>%
  unique()

exclude_matches <- union(matches_with_diff_goals, early_red_card_matches)

match_data_special <- match_data %>%
  filter(!fixture_id %in% exclude_matches)

removed_matches <- nrow(match_data) - nrow(match_data_special)

cat("Amount of deleted rows: ", removed_matches, "\n")
## Amount of deleted rows:  3280
match_data_special
## # A tibble: 52,847 × 115
##    fixture_id halftime current_time        half_start_datetime
##         <dbl> <chr>    <dttm>              <dttm>             
##  1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
##  2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
##  3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
##  4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
##  5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
##  6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
##  7   19134453 1st-half 2024-08-16 19:07:18 2024-08-16 19:00:31
##  8   19134453 1st-half 2024-08-16 19:08:19 2024-08-16 19:00:31
##  9   19134453 1st-half 2024-08-16 19:09:19 2024-08-16 19:00:31
## 10   19134453 1st-half 2024-08-16 19:10:18 2024-08-16 19:00:31
## # ℹ 52,837 more rows
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>, …
first_half_special <- match_data_special %>% filter(halftime == "1st-half")
second_half_special <- match_data_special %>% filter(halftime == "2nd-half")
head(first_half_special)
## # A tibble: 6 × 115
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
## 2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
## 3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
## 4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
## 5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
## 6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …
head(second_half_special)
## # A tibble: 6 × 115
##   fixture_id halftime current_time        half_start_datetime
##        <dbl> <chr>    <dttm>              <dttm>             
## 1   19134453 2nd-half 2024-08-16 20:04:18 2024-08-16 20:02:38
## 2   19134453 2nd-half 2024-08-16 20:05:18 2024-08-16 20:02:38
## 3   19134453 2nd-half 2024-08-16 20:06:18 2024-08-16 20:02:38
## 4   19134453 2nd-half 2024-08-16 20:07:19 2024-08-16 20:02:38
## 5   19134453 2nd-half 2024-08-16 20:08:18 2024-08-16 20:02:38
## 6   19134453 2nd-half 2024-08-16 20:09:18 2024-08-16 20:02:38
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>,
## #   `Ball Possession % - home` <dbl>, `Ball Safe - away` <dbl>, …

After specialization, 1st half

ggplot(first_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +
  labs(title = "First Half - P(Home Win) - P(Away Win) vs P(Draw)",
       x = "P(Home Win) - P(Away Win)",
       y = "P(Draw)") +
  theme_minimal()

bins <- seq(-1, 1, by = 0.2)  

binned_data <- first_half_special %>%
  mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
  group_by(bin) %>%
  summarise(
    total_games = n(),                        
    draws = sum(result == "X"),               
    empirical_P_tie = draws / total_games,    
    avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)  
  ) %>%
  filter(!is.na(bin))  

ggplot(binned_data, aes(x = bin)) +
  geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
  geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
  geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
  labs(title = "First Half - Empirical vs Bookmaker P(Draw) by Bins",
       x = "P(Home Win) - P(Away Win) Bins",
       y = "Probability of Draw") +
  theme_minimal()

num_bins <- 20

coeffs_1st_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = first_half_special)
first_half_special$predicted_draw <- predict(coeffs_1st_half, newdata = first_half_special)

breaks <- seq(min(first_half_special$P_home_minus_P_away), max(first_half_special$P_home_minus_P_away), length.out = num_bins + 1)
first_half_special$P_home_minus_P_away_bin <- cut(first_half_special$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)

actual_probabilities_first <- first_half_special %>%
  group_by(P_home_minus_P_away_bin) %>%
  summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))

bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2

actual_probabilities_first <- actual_probabilities_first %>%
  mutate(bin_center = bin_centers)

coeffs_1st_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_first)

predicted_probabilities <- predict(coeffs_1st_half_actual, newdata = actual_probabilities_first)

ggplot(first_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +  # Bookmaker olasılıkları
  geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) +  # Bookmaker Trend Line
  geom_point(data = actual_probabilities_first, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +  
  geom_line(data = actual_probabilities_first, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +  
  labs(
    title = "P(Home Win) - P(Away Win) vs P(Draw) (1st Half) with Actual Outcome Trend",
    x = "P(Home Win) - P(Away Win)",
    y = "P(Draw)"
  ) +
  theme_minimal()

After removing the matches fitting well to the cases (red card in the first 10 minutes of a game and one of the teams score a goal after 90th minute), we see that there is a better fit in the model.

After specialization, 2nd half

ggplot(second_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +
  labs(title = "Second Half - P(Home Win) - P(Away Win) vs P(Draw)",
       x = "P(Home Win) - P(Away Win)",
       y = "P(Draw)") +
  theme_minimal()

bins <- seq(-1, 1, by = 0.2)  

binned_data <- second_half_special %>%
  mutate(bin = cut(P_home_minus_P_away, breaks = bins, include.lowest = TRUE)) %>%
  group_by(bin) %>%
  summarise(
    total_games = n(),                        
    draws = sum(result == "X"),               
    empirical_P_tie = draws / total_games,    
    avg_bookmaker_P_tie = mean(P_draw, na.rm = TRUE)  
  ) %>%
  filter(!is.na(bin))  

ggplot(binned_data, aes(x = bin)) +
  geom_bar(aes(y = empirical_P_tie), stat = "identity", fill = "red", alpha = 0.6) +
  geom_point(aes(y = avg_bookmaker_P_tie), color = "blue", size = 3) +
  geom_line(aes(y = avg_bookmaker_P_tie, group = 1), color = "blue", linetype = "dashed") +
  labs(title = "Second Half - Empirical vs Bookmaker P(Draw) by Bins",
       x = "P(Home Win) - P(Away Win) Bins",
       y = "Probability of Draw") +
  theme_minimal()

num_bins <- 20

coeffs_2nd_half <- lm(P_draw ~ poly(P_home_minus_P_away, 2), data = second_half_special)
second_half_special$predicted_draw <- predict(coeffs_2nd_half, newdata = second_half_special)

breaks <- seq(min(second_half_special$P_home_minus_P_away), max(second_half_special$P_home_minus_P_away), length.out = num_bins + 1)
second_half_special$P_home_minus_P_away_bin <- cut(second_half_special$P_home_minus_P_away, breaks = breaks, include.lowest = TRUE)

actual_probabilities_second <- second_half_special %>%
  group_by(P_home_minus_P_away_bin) %>%
  summarise(probability_of_draw = mean(result == "X", na.rm = TRUE))

bin_centers <- breaks[-length(breaks)] + diff(breaks) / 2

actual_probabilities_second <- actual_probabilities_second %>%
  mutate(bin_center = bin_centers)

coeffs_2nd_half_actual <- lm(probability_of_draw ~ poly(bin_centers, 2), data = actual_probabilities_second)

predicted_probabilities <- predict(coeffs_2nd_half_actual, newdata = actual_probabilities_second)

ggplot(second_half_special, aes(x = P_home_minus_P_away, y = P_draw)) +
  geom_point(alpha = 0.5, color = "blue") +  # Bookmaker olasılıkları
  geom_line(aes(x = P_home_minus_P_away, y = predicted_draw), color = "red", size = 1) +  # Bookmaker Trend Line
  geom_point(data = actual_probabilities_second, aes(x = bin_centers, y = probability_of_draw), color = "green", size = 3) +  
  geom_line(data = actual_probabilities_second, aes(x = bin_centers, y = predicted_probabilities), color = "orange", size = 1) +  
  labs(
    title = "(2nd Half) | P(Home Win) - P(Away Win) vs P(Draw) with Actual Outcome Trend",
    x = "P(Home Win) - P(Away Win)",
    y = "P(Draw)"
  ) +
  theme_minimal()

After removing the matches fitting well to the cases (red card in the first 10 minutes of a game and one of the teams score a goal after 90th minute), we see that there is a better fit in the model.

TASK 3 Before starting the analysis, I looked at the correlations between the variables and those with a correlation of over 85% to get a general overview.We see a 1 or -1 correlation between some variables (such as Ball Possession % - home and Ball Possession % - away). Since such variables are directly related to each other, I decided not to give importance to this high difficulty in the analysis.

library(dplyr)

match_data_special <- match_data_special %>%
  group_by(fixture_id) %>%
  arrange(fixture_id) %>%
  tidyr::fill(everything(), .direction = "down") %>%
  ungroup()

cat("NA count:\n")
## NA count:
print(colSums(is.na(match_data_special)))
##                          fixture_id                            halftime 
##                                   0                                   0 
##                        current_time                 half_start_datetime 
##                                   0                                   0 
##                match_start_datetime                              minute 
##                                   0                                   0 
##                              second             latest_bookmaker_update 
##                                   0                                   0 
##                           suspended                             stopped 
##                                   0                                   0 
##                                   1                                   2 
##                                   0                                   0 
##                                   X                                name 
##                                   0                                   0 
##                             ticking             Accurate Crosses - away 
##                                   0                                2735 
##             Accurate Crosses - home                      Assists - away 
##                                2716                               24695 
##                      Assists - home                      Attacks - away 
##                               24693                                  37 
##                      Attacks - home            Ball Possession % - away 
##                                  32                                  22 
##            Ball Possession % - home                    Ball Safe - away 
##                                  21                               20071 
##                    Ball Safe - home                   Challenges - away 
##                               20067                                4197 
##                   Challenges - home                      Corners - away 
##                                4192                                  69 
##                      Corners - home              Counter Attacks - away 
##                                  62                               34253 
##              Counter Attacks - home            Dangerous Attacks - away 
##                               34251                                  50 
##            Dangerous Attacks - home             Dribble Attempts - away 
##                                  45                                7031 
##             Dribble Attempts - home                        Fouls - away 
##                                7007                                1459 
##                        Fouls - home                   Free Kicks - away 
##                                1448                               48159 
##                   Free Kicks - home                Goal Attempts - away 
##                               48159                               23691 
##                Goal Attempts - home                   Goal Kicks - away 
##                               23684                                4138 
##                   Goal Kicks - home                        Goals - away 
##                                4131                                  37 
##                        Goals - home                      Headers - away 
##                                  36                                9281 
##                      Headers - home                 Hit Woodwork - away 
##                                9275                                 185 
##                 Hit Woodwork - home                     Injuries - away 
##                                 165                               39045 
##                     Injuries - home                Interceptions - away 
##                               39044                                3811 
##                Interceptions - home                   Key Passes - away 
##                                3791                                4883 
##                   Key Passes - home                  Long Passes - away 
##                                4880                                4867 
##                  Long Passes - home                     Offsides - away 
##                                4857                               15828 
##                     Offsides - home                       Passes - away 
##                               15823                                 275 
##                       Passes - home                    Penalties - away 
##                                 261                                  73 
##                    Penalties - home                     Redcards - away 
##                                  66                                  59 
##                     Redcards - home                        Saves - away 
##                                  51                                9445 
##                        Saves - home                 Score Change - away 
##                                9435                                   0 
##                 Score Change - home                Shots Blocked - away 
##                                   0                                 212 
##                Shots Blocked - home              Shots Insidebox - away 
##                                 201                                 243 
##              Shots Insidebox - home             Shots Off Target - away 
##                                 225                                  33 
##             Shots Off Target - home              Shots On Target - away 
##                                  28                                  32 
##              Shots On Target - home             Shots Outsidebox - away 
##                                  28                                 247 
##             Shots Outsidebox - home                  Shots Total - away 
##                                 226                                   0 
##                  Shots Total - home                Substitutions - away 
##                                   0                                  78 
##                Substitutions - home          Successful Dribbles - away 
##                                  69                                4790 
##          Successful Dribbles - home           Successful Headers - away 
##                                4775                                9297 
##           Successful Headers - home     Successful Interceptions - away 
##                                9287                                2399 
##     Successful Interceptions - home            Successful Passes - away 
##                                2394                                 362 
##            Successful Passes - home Successful Passes Percentage - away 
##                                 365                                 143 
## Successful Passes Percentage - home                      Tackles - away 
##                                 140                                1761 
##                      Tackles - home                     Throwins - away 
##                                1750                                1017 
##                     Throwins - home                Total Crosses - away 
##                                1010                                2009 
##                Total Crosses - home                  Yellowcards - away 
##                                1998                                  74 
##                  Yellowcards - home              Yellowred Cards - away 
##                                  64                               19926 
##              Yellowred Cards - home                       current_state 
##                               19919                                  37 
##                         final_score                              result 
##                                   0                                   0 
##                          Total_odds                              P_home 
##                                   0                                   0 
##                              P_away                              P_draw 
##                                   0                                   0 
##                 P_home_minus_P_away                          total_prob 
##                                   0                                   0 
##                         P_home_norm                         P_draw_norm 
##                                   0                                   0 
##                         P_away_norm 
##                                   0
match_data_special <- match_data_special %>%
  mutate(across(where(is.numeric), ~replace(., is.na(.), 0)),  # Sayısal sütunlar için 0 ile doldurma
         across(where(is.character), ~replace(., is.na(.), "0")))  # Karakter sütunlar için "0" ile doldurma

cat("NA count (after filling w/ 0):\n")
## NA count (after filling w/ 0):
print(colSums(is.na(match_data_special)))
##                          fixture_id                            halftime 
##                                   0                                   0 
##                        current_time                 half_start_datetime 
##                                   0                                   0 
##                match_start_datetime                              minute 
##                                   0                                   0 
##                              second             latest_bookmaker_update 
##                                   0                                   0 
##                           suspended                             stopped 
##                                   0                                   0 
##                                   1                                   2 
##                                   0                                   0 
##                                   X                                name 
##                                   0                                   0 
##                             ticking             Accurate Crosses - away 
##                                   0                                   0 
##             Accurate Crosses - home                      Assists - away 
##                                   0                                   0 
##                      Assists - home                      Attacks - away 
##                                   0                                   0 
##                      Attacks - home            Ball Possession % - away 
##                                   0                                   0 
##            Ball Possession % - home                    Ball Safe - away 
##                                   0                                   0 
##                    Ball Safe - home                   Challenges - away 
##                                   0                                   0 
##                   Challenges - home                      Corners - away 
##                                   0                                   0 
##                      Corners - home              Counter Attacks - away 
##                                   0                                   0 
##              Counter Attacks - home            Dangerous Attacks - away 
##                                   0                                   0 
##            Dangerous Attacks - home             Dribble Attempts - away 
##                                   0                                   0 
##             Dribble Attempts - home                        Fouls - away 
##                                   0                                   0 
##                        Fouls - home                   Free Kicks - away 
##                                   0                                   0 
##                   Free Kicks - home                Goal Attempts - away 
##                                   0                                   0 
##                Goal Attempts - home                   Goal Kicks - away 
##                                   0                                   0 
##                   Goal Kicks - home                        Goals - away 
##                                   0                                   0 
##                        Goals - home                      Headers - away 
##                                   0                                   0 
##                      Headers - home                 Hit Woodwork - away 
##                                   0                                   0 
##                 Hit Woodwork - home                     Injuries - away 
##                                   0                                   0 
##                     Injuries - home                Interceptions - away 
##                                   0                                   0 
##                Interceptions - home                   Key Passes - away 
##                                   0                                   0 
##                   Key Passes - home                  Long Passes - away 
##                                   0                                   0 
##                  Long Passes - home                     Offsides - away 
##                                   0                                   0 
##                     Offsides - home                       Passes - away 
##                                   0                                   0 
##                       Passes - home                    Penalties - away 
##                                   0                                   0 
##                    Penalties - home                     Redcards - away 
##                                   0                                   0 
##                     Redcards - home                        Saves - away 
##                                   0                                   0 
##                        Saves - home                 Score Change - away 
##                                   0                                   0 
##                 Score Change - home                Shots Blocked - away 
##                                   0                                   0 
##                Shots Blocked - home              Shots Insidebox - away 
##                                   0                                   0 
##              Shots Insidebox - home             Shots Off Target - away 
##                                   0                                   0 
##             Shots Off Target - home              Shots On Target - away 
##                                   0                                   0 
##              Shots On Target - home             Shots Outsidebox - away 
##                                   0                                   0 
##             Shots Outsidebox - home                  Shots Total - away 
##                                   0                                   0 
##                  Shots Total - home                Substitutions - away 
##                                   0                                   0 
##                Substitutions - home          Successful Dribbles - away 
##                                   0                                   0 
##          Successful Dribbles - home           Successful Headers - away 
##                                   0                                   0 
##           Successful Headers - home     Successful Interceptions - away 
##                                   0                                   0 
##     Successful Interceptions - home            Successful Passes - away 
##                                   0                                   0 
##            Successful Passes - home Successful Passes Percentage - away 
##                                   0                                   0 
## Successful Passes Percentage - home                      Tackles - away 
##                                   0                                   0 
##                      Tackles - home                     Throwins - away 
##                                   0                                   0 
##                     Throwins - home                Total Crosses - away 
##                                   0                                   0 
##                Total Crosses - home                  Yellowcards - away 
##                                   0                                   0 
##                  Yellowcards - home              Yellowred Cards - away 
##                                   0                                   0 
##              Yellowred Cards - home                       current_state 
##                                   0                                   0 
##                         final_score                              result 
##                                   0                                   0 
##                          Total_odds                              P_home 
##                                   0                                   0 
##                              P_away                              P_draw 
##                                   0                                   0 
##                 P_home_minus_P_away                          total_prob 
##                                   0                                   0 
##                         P_home_norm                         P_draw_norm 
##                                   0                                   0 
##                         P_away_norm 
##                                   0
numeric_data <- first_half_special %>% select_if(is.numeric)

correlation_matrix <- cor(numeric_data, use = "complete.obs")
## Warning in cor(numeric_data, use = "complete.obs"): the standard deviation is
## zero
correlation_long <- as.data.frame(as.table(correlation_matrix))

correlation_long <- correlation_long[correlation_long$Var1 != correlation_long$Var2, ]

correlation_long <- correlation_long[!is.na(correlation_long$Freq), ]

correlation_long <- correlation_long[abs(correlation_long$Freq) > 0.85, ]

correlation_long
##                                      Var1                                Var2
## 5                                       2                          fixture_id
## 33                      Goal Kicks - away                          fixture_id
## 42                        Injuries - home                          fixture_id
## 54                       Penalties - home                          fixture_id
## 96                                 P_home                          fixture_id
## 101                           P_home_norm                          fixture_id
## 225                     Challenges - away                              second
## 251                  Interceptions - away                              second
## 409                                P_away                                   1
## 415                           P_away_norm                                   1
## 417                            fixture_id                                   2
## 422                                     X                                   2
## 458                       Injuries - home                                   2
## 470                      Penalties - home                                   2
## 525                                     2                                   X
## 562                       Injuries - home                                   X
## 574                      Penalties - home                                   X
## 785                          Saves - away             Accurate Crosses - home
## 792                Shots Insidebox - home             Accurate Crosses - home
## 796                Shots On Target - home             Accurate Crosses - home
## 819                    Yellowcards - away             Accurate Crosses - home
## 1053             Ball Possession % - away                      Attacks - away
## 1054             Ball Possession % - home                      Attacks - away
## 1062               Counter Attacks - home                      Attacks - away
## 1063             Dangerous Attacks - away                      Attacks - away
## 1071                 Goal Attempts - away                      Attacks - away
## 1074                    Goal Kicks - home                      Attacks - away
## 1085                    Key Passes - away                      Attacks - away
## 1091                        Passes - away                      Attacks - away
## 1103               Shots Insidebox - away                      Attacks - away
## 1105              Shots Off Target - away                      Attacks - away
## 1111                   Shots Total - away                      Attacks - away
## 1113                 Substitutions - away                      Attacks - away
## 1121             Successful Passes - away                      Attacks - away
## 1123  Successful Passes Percentage - away                      Attacks - away
## 1124  Successful Passes Percentage - home                      Attacks - away
## 1129                 Total Crosses - away                      Attacks - away
## 1157             Ball Possession % - away                      Attacks - home
## 1158             Ball Possession % - home                      Attacks - home
## 1167             Dangerous Attacks - away                      Attacks - home
## 1168             Dangerous Attacks - home                      Attacks - home
## 1178                    Goal Kicks - home                      Attacks - home
## 1196                        Passes - home                      Attacks - home
## 1215                   Shots Total - away                      Attacks - home
## 1226             Successful Passes - home                      Attacks - home
## 1228  Successful Passes Percentage - home                      Attacks - home
## 1234                 Total Crosses - home                      Attacks - home
## 1259                       Attacks - away            Ball Possession % - away
## 1260                       Attacks - home            Ball Possession % - away
## 1262             Ball Possession % - home            Ball Possession % - away
## 1267                       Corners - away            Ball Possession % - away
## 1268                       Corners - home            Ball Possession % - away
## 1270               Counter Attacks - home            Ball Possession % - away
## 1271             Dangerous Attacks - away            Ball Possession % - away
## 1282                    Goal Kicks - home            Ball Possession % - away
## 1293                    Key Passes - away            Ball Possession % - away
## 1299                        Passes - away            Ball Possession % - away
## 1311               Shots Insidebox - away            Ball Possession % - away
## 1313              Shots Off Target - away            Ball Possession % - away
## 1319                   Shots Total - away            Ball Possession % - away
## 1321                 Substitutions - away            Ball Possession % - away
## 1329             Successful Passes - away            Ball Possession % - away
## 1330             Successful Passes - home            Ball Possession % - away
## 1331  Successful Passes Percentage - away            Ball Possession % - away
## 1332  Successful Passes Percentage - home            Ball Possession % - away
## 1337                 Total Crosses - away            Ball Possession % - away
## 1338                 Total Crosses - home            Ball Possession % - away
## 1363                       Attacks - away            Ball Possession % - home
## 1364                       Attacks - home            Ball Possession % - home
## 1365             Ball Possession % - away            Ball Possession % - home
## 1371                       Corners - away            Ball Possession % - home
## 1372                       Corners - home            Ball Possession % - home
## 1374               Counter Attacks - home            Ball Possession % - home
## 1375             Dangerous Attacks - away            Ball Possession % - home
## 1386                    Goal Kicks - home            Ball Possession % - home
## 1397                    Key Passes - away            Ball Possession % - home
## 1403                        Passes - away            Ball Possession % - home
## 1415               Shots Insidebox - away            Ball Possession % - home
## 1417              Shots Off Target - away            Ball Possession % - home
## 1423                   Shots Total - away            Ball Possession % - home
## 1425                 Substitutions - away            Ball Possession % - home
## 1433             Successful Passes - away            Ball Possession % - home
## 1434             Successful Passes - home            Ball Possession % - home
## 1435  Successful Passes Percentage - away            Ball Possession % - home
## 1436  Successful Passes Percentage - home            Ball Possession % - home
## 1441                 Total Crosses - away            Ball Possession % - home
## 1442                 Total Crosses - home            Ball Possession % - home
## 1474                    Challenges - home                    Ball Safe - away
## 1500                 Interceptions - home                    Ball Safe - away
## 1601                      Injuries - away                    Ball Safe - home
## 1612                        Passes - home                    Ball Safe - home
## 1636           Successful Dribbles - home                    Ball Safe - home
## 1642             Successful Passes - home                    Ball Safe - home
## 1667                               second                   Challenges - away
## 1707                 Interceptions - away                   Challenges - away
## 1752                      Throwins - home                   Challenges - away
## 1783                     Ball Safe - away                   Challenges - home
## 1811                 Interceptions - away                   Challenges - home
## 1812                 Interceptions - home                   Challenges - home
## 1885             Ball Possession % - away                      Corners - away
## 1886             Ball Possession % - home                      Corners - away
## 1892                       Corners - home                      Corners - away
## 1907                         Goals - away                      Corners - away
## 1922                      Offsides - home                      Corners - away
## 1923                        Passes - away                      Corners - away
## 1930                         Saves - home                      Corners - away
## 1953             Successful Passes - away                      Corners - away
## 1956  Successful Passes Percentage - home                      Corners - away
## 1961                 Total Crosses - away                      Corners - away
## 1962                 Total Crosses - home                      Corners - away
## 1989             Ball Possession % - away                      Corners - home
## 1990             Ball Possession % - home                      Corners - home
## 1995                       Corners - away                      Corners - home
## 2011                         Goals - away                      Corners - home
## 2026                      Offsides - home                      Corners - home
## 2027                        Passes - away                      Corners - home
## 2034                         Saves - home                      Corners - home
## 2040               Shots Insidebox - home                      Corners - home
## 2049                 Substitutions - away                      Corners - home
## 2057             Successful Passes - away                      Corners - home
## 2059  Successful Passes Percentage - away                      Corners - home
## 2060  Successful Passes Percentage - home                      Corners - home
## 2065                 Total Crosses - away                      Corners - home
## 2066                 Total Crosses - home                      Corners - home
## 2195                       Attacks - away              Counter Attacks - home
## 2197             Ball Possession % - away              Counter Attacks - home
## 2198             Ball Possession % - home              Counter Attacks - home
## 2207             Dangerous Attacks - away              Counter Attacks - home
## 2218                    Goal Kicks - home              Counter Attacks - home
## 2229                    Key Passes - away              Counter Attacks - home
## 2235                        Passes - away              Counter Attacks - home
## 2246                 Shots Blocked - home              Counter Attacks - home
## 2247               Shots Insidebox - away              Counter Attacks - home
## 2249              Shots Off Target - away              Counter Attacks - home
## 2255                   Shots Total - away              Counter Attacks - home
## 2259           Successful Dribbles - away              Counter Attacks - home
## 2265             Successful Passes - away              Counter Attacks - home
## 2267  Successful Passes Percentage - away              Counter Attacks - home
## 2268  Successful Passes Percentage - home              Counter Attacks - home
## 2299                       Attacks - away            Dangerous Attacks - away
## 2300                       Attacks - home            Dangerous Attacks - away
## 2301             Ball Possession % - away            Dangerous Attacks - away
## 2302             Ball Possession % - home            Dangerous Attacks - away
## 2310               Counter Attacks - home            Dangerous Attacks - away
## 2319                 Goal Attempts - away            Dangerous Attacks - away
## 2322                    Goal Kicks - home            Dangerous Attacks - away
## 2333                    Key Passes - away            Dangerous Attacks - away
## 2339                        Passes - away            Dangerous Attacks - away
## 2349                 Shots Blocked - away            Dangerous Attacks - away
## 2351               Shots Insidebox - away            Dangerous Attacks - away
## 2353              Shots Off Target - away            Dangerous Attacks - away
## 2359                   Shots Total - away            Dangerous Attacks - away
## 2363           Successful Dribbles - away            Dangerous Attacks - away
## 2369             Successful Passes - away            Dangerous Attacks - away
## 2371  Successful Passes Percentage - away            Dangerous Attacks - away
## 2372  Successful Passes Percentage - home            Dangerous Attacks - away
## 2377                 Total Crosses - away            Dangerous Attacks - away
## 2404                       Attacks - home            Dangerous Attacks - home
## 2425                    Goal Kicks - away            Dangerous Attacks - home
## 2444                        Passes - home            Dangerous Attacks - home
## 2474             Successful Passes - home            Dangerous Attacks - home
## 2476  Successful Passes Percentage - home            Dangerous Attacks - home
## 2676           Successful Dribbles - home             Dribble Attempts - home
## 2750                    Key Passes - home                        Fouls - away
## 2768               Shots Insidebox - home                        Fouls - away
## 2772               Shots On Target - home                        Fouls - away
## 2776                   Shots Total - home                        Fouls - away
## 2777                 Substitutions - away                        Fouls - away
## 2784      Successful Interceptions - home                        Fouls - away
## 2837                    Free Kicks - away                        Fouls - home
## 2861                     Penalties - away                        Fouls - home
## 2940                         Fouls - home                   Free Kicks - away
## 2965                     Penalties - away                   Free Kicks - away
## 3054                       Headers - home                   Free Kicks - home
## 3078                 Shots Blocked - home                   Free Kicks - home
## 3094            Successful Headers - home                   Free Kicks - home
## 3131                       Attacks - away                Goal Attempts - away
## 3143             Dangerous Attacks - away                Goal Attempts - away
## 3154                    Goal Kicks - home                Goal Attempts - away
## 3165                    Key Passes - away                Goal Attempts - away
## 3181                 Shots Blocked - away                Goal Attempts - away
## 3189              Shots Outsidebox - away                Goal Attempts - away
## 3191                   Shots Total - away                Goal Attempts - away
## 3204  Successful Passes Percentage - home                Goal Attempts - away
## 3209                 Total Crosses - away                Goal Attempts - away
## 3329                           fixture_id                   Goal Kicks - away
## 3352             Dangerous Attacks - home                   Goal Kicks - away
## 3394              Shots Off Target - home                   Goal Kicks - away
## 3424                               P_home                   Goal Kicks - away
## 3429                          P_home_norm                   Goal Kicks - away
## 3443                       Attacks - away                   Goal Kicks - home
## 3444                       Attacks - home                   Goal Kicks - home
## 3445             Ball Possession % - away                   Goal Kicks - home
## 3446             Ball Possession % - home                   Goal Kicks - home
## 3454               Counter Attacks - home                   Goal Kicks - home
## 3455             Dangerous Attacks - away                   Goal Kicks - home
## 3463                 Goal Attempts - away                   Goal Kicks - home
## 3477                    Key Passes - away                   Goal Kicks - home
## 3483                        Passes - away                   Goal Kicks - home
## 3493                 Shots Blocked - away                   Goal Kicks - home
## 3495               Shots Insidebox - away                   Goal Kicks - home
## 3497              Shots Off Target - away                   Goal Kicks - home
## 3503                   Shots Total - away                   Goal Kicks - home
## 3507           Successful Dribbles - away                   Goal Kicks - home
## 3513             Successful Passes - away                   Goal Kicks - home
## 3515  Successful Passes Percentage - away                   Goal Kicks - home
## 3516  Successful Passes Percentage - home                   Goal Kicks - home
## 3521                 Total Crosses - away                   Goal Kicks - home
## 3522                 Total Crosses - home                   Goal Kicks - home
## 3555                       Corners - away                        Goals - away
## 3556                       Corners - home                        Goals - away
## 3594                         Saves - home                        Goals - away
## 3878                    Free Kicks - home                      Headers - home
## 3895                   Long Passes - away                      Headers - home
## 3926            Successful Headers - home                      Headers - home
## 4176                     Ball Safe - home                     Injuries - away
## 4212                        Passes - home                     Injuries - away
## 4236           Successful Dribbles - home                     Injuries - away
## 4265                           fixture_id                     Injuries - home
## 4269                                    2                     Injuries - home
## 4270                                    X                     Injuries - home
## 4318                     Penalties - home                     Injuries - home
## 4371                               second                Interceptions - away
## 4385                    Challenges - away                Interceptions - away
## 4386                    Challenges - home                Interceptions - away
## 4456                      Throwins - home                Interceptions - away
## 4487                     Ball Safe - away                Interceptions - home
## 4490                    Challenges - home                Interceptions - home
## 4587                       Attacks - away                   Key Passes - away
## 4589             Ball Possession % - away                   Key Passes - away
## 4590             Ball Possession % - home                   Key Passes - away
## 4598               Counter Attacks - home                   Key Passes - away
## 4599             Dangerous Attacks - away                   Key Passes - away
## 4607                 Goal Attempts - away                   Key Passes - away
## 4610                    Goal Kicks - home                   Key Passes - away
## 4638                 Shots Blocked - home                   Key Passes - away
## 4639               Shots Insidebox - away                   Key Passes - away
## 4641              Shots Off Target - away                   Key Passes - away
## 4647                   Shots Total - away                   Key Passes - away
## 4651           Successful Dribbles - away                   Key Passes - away
## 4657             Successful Passes - away                   Key Passes - away
## 4659  Successful Passes Percentage - away                   Key Passes - away
## 4660  Successful Passes Percentage - home                   Key Passes - away
## 4707                         Fouls - away                   Key Passes - home
## 4737                         Saves - away                   Key Passes - home
## 4744               Shots Insidebox - home                   Key Passes - home
## 4748               Shots On Target - home                   Key Passes - home
## 4752                   Shots Total - home                   Key Passes - home
## 4753                 Substitutions - away                   Key Passes - home
## 4760      Successful Interceptions - home                   Key Passes - home
## 4772                   Yellowcards - home                   Key Passes - home
## 4822                       Headers - home                  Long Passes - away
## 4862            Successful Headers - home                  Long Passes - away
## 4958              Shots Outsidebox - home                  Long Passes - home
## 4973                       Tackles - away                  Long Passes - home
## 4984                               P_home                  Long Passes - home
## 4985                               P_away                  Long Passes - home
## 4987                  P_home_minus_P_away                  Long Passes - home
## 4989                          P_home_norm                  Long Passes - home
## 4991                          P_away_norm                  Long Passes - home
## 5115                       Corners - away                     Offsides - home
## 5116                       Corners - home                     Offsides - home
## 5154                         Saves - home                     Offsides - home
## 5185                 Total Crosses - away                     Offsides - home
## 5211                       Attacks - away                       Passes - away
## 5213             Ball Possession % - away                       Passes - away
## 5214             Ball Possession % - home                       Passes - away
## 5219                       Corners - away                       Passes - away
## 5220                       Corners - home                       Passes - away
## 5222               Counter Attacks - home                       Passes - away
## 5223             Dangerous Attacks - away                       Passes - away
## 5234                    Goal Kicks - home                       Passes - away
## 5263               Shots Insidebox - away                       Passes - away
## 5265              Shots Off Target - away                       Passes - away
## 5273                 Substitutions - away                       Passes - away
## 5281             Successful Passes - away                       Passes - away
## 5283  Successful Passes Percentage - away                       Passes - away
## 5284  Successful Passes Percentage - home                       Passes - away
## 5289                 Total Crosses - away                       Passes - away
## 5290                 Total Crosses - home                       Passes - away
## 5316                       Attacks - home                       Passes - home
## 5320                     Ball Safe - home                       Passes - home
## 5328             Dangerous Attacks - home                       Passes - home
## 5345                      Injuries - away                       Passes - home
## 5365                 Shots Blocked - away                       Passes - home
## 5386             Successful Passes - home                       Passes - home
## 5388  Successful Passes Percentage - home                       Passes - home
## 5394                 Total Crosses - home                       Passes - home
## 5436                         Fouls - home                    Penalties - away
## 5437                    Free Kicks - away                    Penalties - away
## 5513                           fixture_id                    Penalties - home
## 5517                                    2                    Penalties - home
## 5518                                    X                    Penalties - home
## 5554                      Injuries - home                    Penalties - home
## 5832              Accurate Crosses - home                        Saves - away
## 5870                    Key Passes - home                        Saves - away
## 5888               Shots Insidebox - home                        Saves - away
## 5892               Shots On Target - home                        Saves - away
## 5896                   Shots Total - home                        Saves - away
## 5897                 Substitutions - away                        Saves - away
## 5904      Successful Interceptions - home                        Saves - away
## 5907  Successful Passes Percentage - away                        Saves - away
## 5915                   Yellowcards - away                        Saves - away
## 5947                       Corners - away                        Saves - home
## 5948                       Corners - home                        Saves - home
## 5963                         Goals - away                        Saves - home
## 5978                      Offsides - home                        Saves - home
## 6263             Dangerous Attacks - away                Shots Blocked - away
## 6271                 Goal Attempts - away                Shots Blocked - away
## 6274                    Goal Kicks - home                Shots Blocked - away
## 6292                        Passes - home                Shots Blocked - away
## 6309              Shots Outsidebox - away                Shots Blocked - away
## 6311                   Shots Total - away                Shots Blocked - away
## 6315           Successful Dribbles - away                Shots Blocked - away
## 6322             Successful Passes - home                Shots Blocked - away
## 6324  Successful Passes Percentage - home                Shots Blocked - away
## 6329                 Total Crosses - away                Shots Blocked - away
## 6366               Counter Attacks - home                Shots Blocked - home
## 6374                    Free Kicks - home                Shots Blocked - home
## 6389                    Key Passes - away                Shots Blocked - home
## 6407               Shots Insidebox - away                Shots Blocked - home
## 6409              Shots Off Target - away                Shots Blocked - home
## 6417                 Substitutions - away                Shots Blocked - home
## 6427  Successful Passes Percentage - away                Shots Blocked - home
## 6436                   Yellowcards - home                Shots Blocked - home
## 6459                       Attacks - away              Shots Insidebox - away
## 6461             Ball Possession % - away              Shots Insidebox - away
## 6462             Ball Possession % - home              Shots Insidebox - away
## 6470               Counter Attacks - home              Shots Insidebox - away
## 6471             Dangerous Attacks - away              Shots Insidebox - away
## 6482                    Goal Kicks - home              Shots Insidebox - away
## 6493                    Key Passes - away              Shots Insidebox - away
## 6499                        Passes - away              Shots Insidebox - away
## 6510                 Shots Blocked - home              Shots Insidebox - away
## 6513              Shots Off Target - away              Shots Insidebox - away
## 6519                   Shots Total - away              Shots Insidebox - away
## 6521                 Substitutions - away              Shots Insidebox - away
## 6523           Successful Dribbles - away              Shots Insidebox - away
## 6529             Successful Passes - away              Shots Insidebox - away
## 6531  Successful Passes Percentage - away              Shots Insidebox - away
## 6532  Successful Passes Percentage - home              Shots Insidebox - away
## 6537                 Total Crosses - away              Shots Insidebox - away
## 6540                   Yellowcards - home              Shots Insidebox - away
## 6560              Accurate Crosses - home              Shots Insidebox - home
## 6572                       Corners - home              Shots Insidebox - home
## 6579                         Fouls - away              Shots Insidebox - home
## 6598                    Key Passes - home              Shots Insidebox - home
## 6609                         Saves - away              Shots Insidebox - home
## 6620               Shots On Target - home              Shots Insidebox - home
## 6624                   Shots Total - home              Shots Insidebox - home
## 6625                 Substitutions - away              Shots Insidebox - home
## 6632      Successful Interceptions - home              Shots Insidebox - home
## 6667                       Attacks - away             Shots Off Target - away
## 6669             Ball Possession % - away             Shots Off Target - away
## 6670             Ball Possession % - home             Shots Off Target - away
## 6678               Counter Attacks - home             Shots Off Target - away
## 6679             Dangerous Attacks - away             Shots Off Target - away
## 6690                    Goal Kicks - home             Shots Off Target - away
## 6701                    Key Passes - away             Shots Off Target - away
## 6707                        Passes - away             Shots Off Target - away
## 6718                 Shots Blocked - home             Shots Off Target - away
## 6719               Shots Insidebox - away             Shots Off Target - away
## 6727                   Shots Total - away             Shots Off Target - away
## 6731           Successful Dribbles - away             Shots Off Target - away
## 6737             Successful Passes - away             Shots Off Target - away
## 6739  Successful Passes Percentage - away             Shots Off Target - away
## 6793                    Goal Kicks - away             Shots Off Target - home
## 6976              Accurate Crosses - home              Shots On Target - home
## 6995                         Fouls - away              Shots On Target - home
## 7014                    Key Passes - home              Shots On Target - home
## 7025                         Saves - away              Shots On Target - home
## 7032               Shots Insidebox - home              Shots On Target - home
## 7040                   Shots Total - home              Shots On Target - home
## 7041                 Substitutions - away              Shots On Target - home
## 7048      Successful Interceptions - home              Shots On Target - home
## 7103                 Goal Attempts - away             Shots Outsidebox - away
## 7133                 Shots Blocked - away             Shots Outsidebox - away
## 7224                   Long Passes - home             Shots Outsidebox - home
## 7291                       Attacks - away                  Shots Total - away
## 7292                       Attacks - home                  Shots Total - away
## 7293             Ball Possession % - away                  Shots Total - away
## 7294             Ball Possession % - home                  Shots Total - away
## 7302               Counter Attacks - home                  Shots Total - away
## 7303             Dangerous Attacks - away                  Shots Total - away
## 7311                 Goal Attempts - away                  Shots Total - away
## 7314                    Goal Kicks - home                  Shots Total - away
## 7325                    Key Passes - away                  Shots Total - away
## 7341                 Shots Blocked - away                  Shots Total - away
## 7343               Shots Insidebox - away                  Shots Total - away
## 7345              Shots Off Target - away                  Shots Total - away
## 7355           Successful Dribbles - away                  Shots Total - away
## 7361             Successful Passes - away                  Shots Total - away
## 7364  Successful Passes Percentage - home                  Shots Total - away
## 7369                 Total Crosses - away                  Shots Total - away
## 7411                         Fouls - away                  Shots Total - home
## 7430                    Key Passes - home                  Shots Total - home
## 7441                         Saves - away                  Shots Total - home
## 7448               Shots Insidebox - home                  Shots Total - home
## 7452               Shots On Target - home                  Shots Total - home
## 7457                 Substitutions - away                  Shots Total - home
## 7464      Successful Interceptions - home                  Shots Total - home
## 7467  Successful Passes Percentage - away                  Shots Total - home
## 7476                   Yellowcards - home                  Shots Total - home
## 7499                       Attacks - away                Substitutions - away
## 7501             Ball Possession % - away                Substitutions - away
## 7502             Ball Possession % - home                Substitutions - away
## 7508                       Corners - home                Substitutions - away
## 7515                         Fouls - away                Substitutions - away
## 7534                    Key Passes - home                Substitutions - away
## 7539                        Passes - away                Substitutions - away
## 7545                         Saves - away                Substitutions - away
## 7550                 Shots Blocked - home                Substitutions - away
## 7551               Shots Insidebox - away                Substitutions - away
## 7552               Shots Insidebox - home                Substitutions - away
## 7556               Shots On Target - home                Substitutions - away
## 7560                   Shots Total - home                Substitutions - away
## 7569             Successful Passes - away                Substitutions - away
## 7571  Successful Passes Percentage - away                Substitutions - away
## 7572  Successful Passes Percentage - home                Substitutions - away
## 7578                 Total Crosses - home                Substitutions - away
## 7579                   Yellowcards - away                Substitutions - away
## 7580                   Yellowcards - home                Substitutions - away
## 7718               Counter Attacks - home          Successful Dribbles - away
## 7719             Dangerous Attacks - away          Successful Dribbles - away
## 7730                    Goal Kicks - home          Successful Dribbles - away
## 7741                    Key Passes - away          Successful Dribbles - away
## 7757                 Shots Blocked - away          Successful Dribbles - away
## 7759               Shots Insidebox - away          Successful Dribbles - away
## 7761              Shots Off Target - away          Successful Dribbles - away
## 7767                   Shots Total - away          Successful Dribbles - away
## 7780  Successful Passes Percentage - home          Successful Dribbles - away
## 7816                     Ball Safe - home          Successful Dribbles - home
## 7826              Dribble Attempts - home          Successful Dribbles - home
## 7841                      Injuries - away          Successful Dribbles - home
## 8038                    Free Kicks - home           Successful Headers - home
## 8046                       Headers - home           Successful Headers - home
## 8055                   Long Passes - away           Successful Headers - home
## 8197                       Tackles - away     Successful Interceptions - away
## 8243                         Fouls - away     Successful Interceptions - home
## 8262                    Key Passes - home     Successful Interceptions - home
## 8273                         Saves - away     Successful Interceptions - home
## 8280               Shots Insidebox - home     Successful Interceptions - home
## 8284               Shots On Target - home     Successful Interceptions - home
## 8288                   Shots Total - home     Successful Interceptions - home
## 8331                       Attacks - away            Successful Passes - away
## 8333             Ball Possession % - away            Successful Passes - away
## 8334             Ball Possession % - home            Successful Passes - away
## 8339                       Corners - away            Successful Passes - away
## 8340                       Corners - home            Successful Passes - away
## 8342               Counter Attacks - home            Successful Passes - away
## 8343             Dangerous Attacks - away            Successful Passes - away
## 8354                    Goal Kicks - home            Successful Passes - away
## 8365                    Key Passes - away            Successful Passes - away
## 8371                        Passes - away            Successful Passes - away
## 8383               Shots Insidebox - away            Successful Passes - away
## 8385              Shots Off Target - away            Successful Passes - away
## 8391                   Shots Total - away            Successful Passes - away
## 8393                 Substitutions - away            Successful Passes - away
## 8403  Successful Passes Percentage - away            Successful Passes - away
## 8404  Successful Passes Percentage - home            Successful Passes - away
## 8409                 Total Crosses - away            Successful Passes - away
## 8410                 Total Crosses - home            Successful Passes - away
## 8436                       Attacks - home            Successful Passes - home
## 8437             Ball Possession % - away            Successful Passes - home
## 8438             Ball Possession % - home            Successful Passes - home
## 8440                     Ball Safe - home            Successful Passes - home
## 8448             Dangerous Attacks - home            Successful Passes - home
## 8476                        Passes - home            Successful Passes - home
## 8485                 Shots Blocked - away            Successful Passes - home
## 8508  Successful Passes Percentage - home            Successful Passes - home
## 8513                 Total Crosses - away            Successful Passes - home
## 8514                 Total Crosses - home            Successful Passes - home
## 8539                       Attacks - away Successful Passes Percentage - away
## 8541             Ball Possession % - away Successful Passes Percentage - away
## 8542             Ball Possession % - home Successful Passes Percentage - away
## 8548                       Corners - home Successful Passes Percentage - away
## 8550               Counter Attacks - home Successful Passes Percentage - away
## 8551             Dangerous Attacks - away Successful Passes Percentage - away
## 8562                    Goal Kicks - home Successful Passes Percentage - away
## 8573                    Key Passes - away Successful Passes Percentage - away
## 8579                        Passes - away Successful Passes Percentage - away
## 8585                         Saves - away Successful Passes Percentage - away
## 8590                 Shots Blocked - home Successful Passes Percentage - away
## 8591               Shots Insidebox - away Successful Passes Percentage - away
## 8593              Shots Off Target - away Successful Passes Percentage - away
## 8600                   Shots Total - home Successful Passes Percentage - away
## 8601                 Substitutions - away Successful Passes Percentage - away
## 8609             Successful Passes - away Successful Passes Percentage - away
## 8612  Successful Passes Percentage - home Successful Passes Percentage - away
## 8618                 Total Crosses - home Successful Passes Percentage - away
## 8619                   Yellowcards - away Successful Passes Percentage - away
## 8620                   Yellowcards - home Successful Passes Percentage - away
## 8643                       Attacks - away Successful Passes Percentage - home
## 8644                       Attacks - home Successful Passes Percentage - home
## 8645             Ball Possession % - away Successful Passes Percentage - home
## 8646             Ball Possession % - home Successful Passes Percentage - home
## 8651                       Corners - away Successful Passes Percentage - home
## 8652                       Corners - home Successful Passes Percentage - home
## 8654               Counter Attacks - home Successful Passes Percentage - home
## 8655             Dangerous Attacks - away Successful Passes Percentage - home
## 8656             Dangerous Attacks - home Successful Passes Percentage - home
## 8663                 Goal Attempts - away Successful Passes Percentage - home
## 8666                    Goal Kicks - home Successful Passes Percentage - home
## 8677                    Key Passes - away Successful Passes Percentage - home
## 8683                        Passes - away Successful Passes Percentage - home
## 8684                        Passes - home Successful Passes Percentage - home
## 8693                 Shots Blocked - away Successful Passes Percentage - home
## 8695               Shots Insidebox - away Successful Passes Percentage - home
## 8703                   Shots Total - away Successful Passes Percentage - home
## 8705                 Substitutions - away Successful Passes Percentage - home
## 8707           Successful Dribbles - away Successful Passes Percentage - home
## 8713             Successful Passes - away Successful Passes Percentage - home
## 8714             Successful Passes - home Successful Passes Percentage - home
## 8715  Successful Passes Percentage - away Successful Passes Percentage - home
## 8721                 Total Crosses - away Successful Passes Percentage - home
## 8722                 Total Crosses - home Successful Passes Percentage - home
## 8784                   Long Passes - home                      Tackles - away
## 8815      Successful Interceptions - away                      Tackles - away
## 8833                               P_away                      Tackles - away
## 8839                          P_away_norm                      Tackles - away
## 9065                    Challenges - away                     Throwins - home
## 9091                 Interceptions - away                     Throwins - home
## 9163                       Attacks - away                Total Crosses - away
## 9165             Ball Possession % - away                Total Crosses - away
## 9166             Ball Possession % - home                Total Crosses - away
## 9171                       Corners - away                Total Crosses - away
## 9172                       Corners - home                Total Crosses - away
## 9175             Dangerous Attacks - away                Total Crosses - away
## 9183                 Goal Attempts - away                Total Crosses - away
## 9186                    Goal Kicks - home                Total Crosses - away
## 9202                      Offsides - home                Total Crosses - away
## 9203                        Passes - away                Total Crosses - away
## 9213                 Shots Blocked - away                Total Crosses - away
## 9215               Shots Insidebox - away                Total Crosses - away
## 9223                   Shots Total - away                Total Crosses - away
## 9233             Successful Passes - away                Total Crosses - away
## 9234             Successful Passes - home                Total Crosses - away
## 9236  Successful Passes Percentage - home                Total Crosses - away
## 9242                 Total Crosses - home                Total Crosses - away
## 9268                       Attacks - home                Total Crosses - home
## 9269             Ball Possession % - away                Total Crosses - home
## 9270             Ball Possession % - home                Total Crosses - home
## 9275                       Corners - away                Total Crosses - home
## 9276                       Corners - home                Total Crosses - home
## 9290                    Goal Kicks - home                Total Crosses - home
## 9307                        Passes - away                Total Crosses - home
## 9308                        Passes - home                Total Crosses - home
## 9329                 Substitutions - away                Total Crosses - home
## 9337             Successful Passes - away                Total Crosses - home
## 9338             Successful Passes - home                Total Crosses - home
## 9339  Successful Passes Percentage - away                Total Crosses - home
## 9340  Successful Passes Percentage - home                Total Crosses - home
## 9345                 Total Crosses - away                Total Crosses - home
## 9368              Accurate Crosses - home                  Yellowcards - away
## 9417                         Saves - away                  Yellowcards - away
## 9433                 Substitutions - away                  Yellowcards - away
## 9443  Successful Passes Percentage - away                  Yellowcards - away
## 9510                    Key Passes - home                  Yellowcards - home
## 9526                 Shots Blocked - home                  Yellowcards - home
## 9527               Shots Insidebox - away                  Yellowcards - home
## 9536                   Shots Total - home                  Yellowcards - home
## 9537                 Substitutions - away                  Yellowcards - home
## 9547  Successful Passes Percentage - away                  Yellowcards - home
## 9881                           fixture_id                              P_home
## 9913                    Goal Kicks - away                              P_home
## 9928                   Long Passes - home                              P_home
## 9977                               P_away                              P_home
## 9979                  P_home_minus_P_away                              P_home
## 9981                          P_home_norm                              P_home
## 9983                          P_away_norm                              P_home
## 9988                                    1                              P_away
## 10032                  Long Passes - home                              P_away
## 10069                      Tackles - away                              P_away
## 10080                              P_home                              P_away
## 10083                 P_home_minus_P_away                              P_away
## 10085                         P_home_norm                              P_away
## 10087                         P_away_norm                              P_away
## 10190                         P_draw_norm                              P_draw
## 10192                      predicted_draw                              P_draw
## 10240                  Long Passes - home                 P_home_minus_P_away
## 10288                              P_home                 P_home_minus_P_away
## 10289                              P_away                 P_home_minus_P_away
## 10293                         P_home_norm                 P_home_minus_P_away
## 10295                         P_away_norm                 P_home_minus_P_away
## 10401                          fixture_id                         P_home_norm
## 10433                   Goal Kicks - away                         P_home_norm
## 10448                  Long Passes - home                         P_home_norm
## 10496                              P_home                         P_home_norm
## 10497                              P_away                         P_home_norm
## 10499                 P_home_minus_P_away                         P_home_norm
## 10503                         P_away_norm                         P_home_norm
## 10602                              P_draw                         P_draw_norm
## 10608                      predicted_draw                         P_draw_norm
## 10612                                   1                         P_away_norm
## 10656                  Long Passes - home                         P_away_norm
## 10693                      Tackles - away                         P_away_norm
## 10704                              P_home                         P_away_norm
## 10705                              P_away                         P_away_norm
## 10707                 P_home_minus_P_away                         P_away_norm
## 10709                         P_home_norm                         P_away_norm
## 10810                              P_draw                      predicted_draw
## 10814                         P_draw_norm                      predicted_draw
##             Freq
## 5      0.8745462
## 33     0.8652475
## 42    -0.8657527
## 54     0.8657527
## 96     0.8916220
## 101    0.8916220
## 225   -0.9438404
## 251   -0.9345203
## 409    0.8577641
## 415    0.8577641
## 417    0.8745462
## 422    0.9046007
## 458   -0.9996599
## 470    0.9996599
## 525    0.9046007
## 562   -0.9119251
## 574    0.9119251
## 785    0.8690829
## 792    0.8594506
## 796    0.8795371
## 819    0.9105507
## 1053   0.9331640
## 1054  -0.9331640
## 1062   0.9239559
## 1063   0.9554754
## 1071   0.8590615
## 1074   0.9443209
## 1085   0.9399353
## 1091   0.9527355
## 1103   0.9485002
## 1105   0.9341039
## 1111   0.9292023
## 1113  -0.8723909
## 1121   0.9582330
## 1123   0.9113811
## 1124  -0.9307463
## 1129   0.9046146
## 1157  -0.8903791
## 1158   0.8903791
## 1167  -0.8754230
## 1168   0.9483605
## 1178  -0.8747841
## 1196   0.9529156
## 1215  -0.8553695
## 1226   0.9708343
## 1228   0.9262470
## 1234   0.9147621
## 1259   0.9331640
## 1260  -0.8903791
## 1262  -1.0000000
## 1267   0.9029211
## 1268  -0.9380456
## 1270   0.8555862
## 1271   0.9311931
## 1282   0.9457371
## 1293   0.8539070
## 1299   0.9552863
## 1311   0.9296235
## 1313   0.8573651
## 1319   0.9028521
## 1321  -0.9073333
## 1329   0.9633340
## 1330  -0.8845332
## 1331   0.9428282
## 1332  -0.9880744
## 1337   0.9361662
## 1338  -0.9532363
## 1363  -0.9331640
## 1364   0.8903791
## 1365  -1.0000000
## 1371  -0.9029211
## 1372   0.9380456
## 1374  -0.8555862
## 1375  -0.9311931
## 1386  -0.9457371
## 1397  -0.8539070
## 1403  -0.9552863
## 1415  -0.9296235
## 1417  -0.8573651
## 1423  -0.9028521
## 1425   0.9073333
## 1433  -0.9633340
## 1434   0.8845332
## 1435  -0.9428282
## 1436   0.9880744
## 1441  -0.9361662
## 1442   0.9532363
## 1474   0.8905176
## 1500   0.9165761
## 1601   0.9692010
## 1612   0.8703329
## 1636   0.8904250
## 1642   0.8528933
## 1667  -0.9438404
## 1707   0.9936542
## 1752   0.8535079
## 1783   0.8905176
## 1811   0.8535568
## 1812   0.9804921
## 1885   0.9029211
## 1886  -0.9029211
## 1892  -0.9469232
## 1907  -0.8719981
## 1922   0.9429304
## 1923   0.8669468
## 1930   0.9365492
## 1953   0.8641072
## 1956  -0.8920524
## 1961   0.9323863
## 1962  -0.8799688
## 1989  -0.9380456
## 1990   0.9380456
## 1995  -0.9469232
## 2011   0.8708211
## 2026  -0.8746266
## 2027  -0.9266830
## 2034  -0.9148891
## 2040   0.8545596
## 2049   0.8575380
## 2057  -0.9237960
## 2059  -0.8563477
## 2060   0.8985082
## 2065  -0.8640713
## 2066   0.9320170
## 2195   0.9239559
## 2197   0.8555862
## 2198  -0.8555862
## 2207   0.9073550
## 2218   0.9264997
## 2229   0.9485199
## 2235   0.8777409
## 2246  -0.8712226
## 2247   0.8880383
## 2249   0.9083823
## 2255   0.8717703
## 2259   0.8912864
## 2265   0.8912617
## 2267   0.8897374
## 2268  -0.8652567
## 2299   0.9554754
## 2300  -0.8754230
## 2301   0.9311931
## 2302  -0.9311931
## 2310   0.9073550
## 2319   0.9382655
## 2322   0.9659524
## 2333   0.9543662
## 2339   0.8776421
## 2349   0.9358448
## 2351   0.9277969
## 2353   0.8966097
## 2359   0.9874107
## 2363   0.9076898
## 2369   0.8934819
## 2371   0.8641769
## 2372  -0.9599913
## 2377   0.9499108
## 2404   0.9483605
## 2425   0.8797869
## 2444   0.9444970
## 2474   0.9400409
## 2476   0.8516399
## 2676   0.8505623
## 2750   0.9524194
## 2768   0.9204578
## 2772   0.9150332
## 2776   0.9264875
## 2777   0.8698684
## 2784   0.9641935
## 2837   0.9466428
## 2861   0.9176379
## 2940   0.9466428
## 2965   0.8845826
## 3054   0.8667377
## 3078   0.8546523
## 3094   0.8861449
## 3131   0.8590615
## 3143   0.9382655
## 3154   0.8732501
## 3165   0.8933450
## 3181   0.9130218
## 3189   0.8567980
## 3191   0.9476673
## 3204  -0.8574849
## 3209   0.8813586
## 3329   0.8652475
## 3352   0.8797869
## 3394   0.8822690
## 3424   0.8759197
## 3429   0.8759197
## 3443   0.9443209
## 3444  -0.8747841
## 3445   0.9457371
## 3446  -0.9457371
## 3454   0.9264997
## 3455   0.9659524
## 3463   0.8732501
## 3477   0.9403474
## 3483   0.9079713
## 3493   0.8895260
## 3495   0.9327395
## 3497   0.9061749
## 3503   0.9440636
## 3507   0.9511419
## 3513   0.9211390
## 3515   0.9058417
## 3516  -0.9613752
## 3521   0.9089071
## 3522  -0.8990171
## 3555  -0.8719981
## 3556   0.8708211
## 3594  -0.8545301
## 3878   0.8667377
## 3895   0.8615998
## 3926   0.9511375
## 4176   0.9692010
## 4212   0.8509430
## 4236   0.8713148
## 4265  -0.8657527
## 4269  -0.9996599
## 4270  -0.9119251
## 4318  -1.0000000
## 4371  -0.9345203
## 4385   0.9936542
## 4386   0.8535568
## 4456   0.8641738
## 4487   0.9165761
## 4490   0.9804921
## 4587   0.9399353
## 4589   0.8539070
## 4590  -0.8539070
## 4598   0.9485199
## 4599   0.9543662
## 4607   0.8933450
## 4610   0.9403474
## 4638  -0.8615917
## 4639   0.9338748
## 4641   0.9422748
## 4647   0.9429351
## 4651   0.9288129
## 4657   0.8563121
## 4659   0.8552192
## 4660  -0.8797664
## 4707   0.9524194
## 4737   0.8669535
## 4744   0.9242750
## 4748   0.9643125
## 4752   0.9644753
## 4753   0.8997230
## 4760   0.9325304
## 4772  -0.8601764
## 4822   0.8615998
## 4862   0.9057629
## 4958   0.8742613
## 4973   0.8927265
## 4984   0.9044191
## 4985  -0.9355816
## 4987   0.9411901
## 4989   0.9044191
## 4991  -0.9355816
## 5115   0.9429304
## 5116  -0.8746266
## 5154   0.9167603
## 5185   0.8632672
## 5211   0.9527355
## 5213   0.9552863
## 5214  -0.9552863
## 5219   0.8669468
## 5220  -0.9266830
## 5222   0.8777409
## 5223   0.8776421
## 5234   0.9079713
## 5263   0.9053212
## 5265   0.8838026
## 5273  -0.9376310
## 5281   0.9987161
## 5283   0.9474154
## 5284  -0.9198038
## 5289   0.8721006
## 5290  -0.8877447
## 5316   0.9529156
## 5320   0.8703329
## 5328   0.9444970
## 5345   0.8509430
## 5365  -0.8587611
## 5386   0.9932060
## 5388   0.8828843
## 5394   0.8905173
## 5436   0.9176379
## 5437   0.8845826
## 5513   0.8657527
## 5517   0.9996599
## 5518   0.9119251
## 5554  -1.0000000
## 5832   0.8690829
## 5870   0.8669535
## 5888   0.9036400
## 5892   0.9301725
## 5896   0.8739967
## 5897   0.8591751
## 5904   0.8640974
## 5907  -0.8742453
## 5915   0.8531051
## 5947   0.9365492
## 5948  -0.9148891
## 5963  -0.8545301
## 5978   0.9167603
## 6263   0.9358448
## 6271   0.9130218
## 6274   0.8895260
## 6292  -0.8587611
## 6309   0.8823611
## 6311   0.9507821
## 6315   0.8516393
## 6322  -0.8586325
## 6324  -0.8992771
## 6329   0.9349576
## 6366  -0.8712226
## 6374   0.8546523
## 6389  -0.8615917
## 6407  -0.8933945
## 6409  -0.8605906
## 6417   0.8620690
## 6427  -0.8899248
## 6436  -0.9311509
## 6459   0.9485002
## 6461   0.9296235
## 6462  -0.9296235
## 6470   0.8880383
## 6471   0.9277969
## 6482   0.9327395
## 6493   0.9338748
## 6499   0.9053212
## 6510  -0.8933945
## 6513   0.9347546
## 6519   0.9030471
## 6521  -0.9003878
## 6523   0.8669981
## 6529   0.9168102
## 6531   0.9435841
## 6532  -0.9252107
## 6537   0.8555272
## 6540   0.8895127
## 6560   0.8594506
## 6572   0.8545596
## 6579   0.9204578
## 6598   0.9242750
## 6609   0.9036400
## 6620   0.9107344
## 6624   0.9313446
## 6625   0.8944687
## 6632   0.9173707
## 6667   0.9341039
## 6669   0.8573651
## 6670  -0.8573651
## 6678   0.9083823
## 6679   0.8966097
## 6690   0.9061749
## 6701   0.9422748
## 6707   0.8838026
## 6718  -0.8605906
## 6719   0.9347546
## 6727   0.8770747
## 6731   0.8652352
## 6737   0.8896423
## 6739   0.8957062
## 6793   0.8822690
## 6976   0.8795371
## 6995   0.9150332
## 7014   0.9643125
## 7025   0.9301725
## 7032   0.9107344
## 7040   0.9348084
## 7041   0.8734474
## 7048   0.9204382
## 7103   0.8567980
## 7133   0.8823611
## 7224   0.8742613
## 7291   0.9292023
## 7292  -0.8553695
## 7293   0.9028521
## 7294  -0.9028521
## 7302   0.8717703
## 7303   0.9874107
## 7311   0.9476673
## 7314   0.9440636
## 7325   0.9429351
## 7341   0.9507821
## 7343   0.9030471
## 7345   0.8770747
## 7355   0.8853334
## 7361   0.8556579
## 7364  -0.9425605
## 7369   0.9512430
## 7411   0.9264875
## 7430   0.9644753
## 7441   0.8739967
## 7448   0.9313446
## 7452   0.9348084
## 7457   0.9403069
## 7464   0.8823549
## 7467  -0.9109371
## 7476  -0.8613417
## 7499  -0.8723909
## 7501  -0.9073333
## 7502   0.9073333
## 7508   0.8575380
## 7515   0.8698684
## 7534   0.8997230
## 7539  -0.9376310
## 7545   0.8591751
## 7550   0.8620690
## 7551  -0.9003878
## 7552   0.8944687
## 7556   0.8734474
## 7560   0.9403069
## 7569  -0.9423399
## 7571  -0.9779021
## 7572   0.8619107
## 7578   0.8580472
## 7579   0.9149445
## 7580  -0.9233751
## 7718   0.8912864
## 7719   0.9076898
## 7730   0.9511419
## 7741   0.9288129
## 7757   0.8516393
## 7759   0.8669981
## 7761   0.8652352
## 7767   0.8853334
## 7780  -0.8539563
## 7816   0.8904250
## 7826   0.8505623
## 7841   0.8713148
## 8038   0.8861449
## 8046   0.9511375
## 8055   0.9057629
## 8197   0.8524422
## 8243   0.9641935
## 8262   0.9325304
## 8273   0.8640974
## 8280   0.9173707
## 8284   0.9204382
## 8288   0.8823549
## 8331   0.9582330
## 8333   0.9633340
## 8334  -0.9633340
## 8339   0.8641072
## 8340  -0.9237960
## 8342   0.8912617
## 8343   0.8934819
## 8354   0.9211390
## 8365   0.8563121
## 8371   0.9987161
## 8383   0.9168102
## 8385   0.8896423
## 8391   0.8556579
## 8393  -0.9423399
## 8403   0.9558808
## 8404  -0.9333600
## 8409   0.8808230
## 8410  -0.8999025
## 8436   0.9708343
## 8437  -0.8845332
## 8438   0.8845332
## 8440   0.8528933
## 8448   0.9400409
## 8476   0.9932060
## 8485  -0.8586325
## 8508   0.9209462
## 8513  -0.8576602
## 8514   0.9253651
## 8539   0.9113811
## 8541   0.9428282
## 8542  -0.9428282
## 8548  -0.8563477
## 8550   0.8897374
## 8551   0.8641769
## 8562   0.9058417
## 8573   0.8552192
## 8579   0.9474154
## 8585  -0.8742453
## 8590  -0.8899248
## 8591   0.9435841
## 8593   0.8957062
## 8600  -0.9109371
## 8601  -0.9779021
## 8609   0.9558808
## 8612  -0.9119276
## 8618  -0.8885370
## 8619  -0.8868385
## 8620   0.9108639
## 8643  -0.9307463
## 8644   0.9262470
## 8645  -0.9880744
## 8646   0.9880744
## 8651  -0.8920524
## 8652   0.8985082
## 8654  -0.8652567
## 8655  -0.9599913
## 8656   0.8516399
## 8663  -0.8574849
## 8666  -0.9613752
## 8677  -0.8797664
## 8683  -0.9198038
## 8684   0.8828843
## 8693  -0.8992771
## 8695  -0.9252107
## 8703  -0.9425605
## 8705   0.8619107
## 8707  -0.8539563
## 8713  -0.9333600
## 8714   0.9209462
## 8715  -0.9119276
## 8721  -0.9543147
## 8722   0.9446555
## 8784   0.8927265
## 8815   0.8524422
## 8833  -0.8502340
## 8839  -0.8502340
## 9065   0.8535079
## 9091   0.8641738
## 9163   0.9046146
## 9165   0.9361662
## 9166  -0.9361662
## 9171   0.9323863
## 9172  -0.8640713
## 9175   0.9499108
## 9183   0.8813586
## 9186   0.9089071
## 9202   0.8632672
## 9203   0.8721006
## 9213   0.9349576
## 9215   0.8555272
## 9223   0.9512430
## 9233   0.8808230
## 9234  -0.8576602
## 9236  -0.9543147
## 9242  -0.8662924
## 9268   0.9147621
## 9269  -0.9532363
## 9270   0.9532363
## 9275  -0.8799688
## 9276   0.9320170
## 9290  -0.8990171
## 9307  -0.8877447
## 9308   0.8905173
## 9329   0.8580472
## 9337  -0.8999025
## 9338   0.9253651
## 9339  -0.8885370
## 9340   0.9446555
## 9345  -0.8662924
## 9368   0.9105507
## 9417   0.8531051
## 9433   0.9149445
## 9443  -0.8868385
## 9510  -0.8601764
## 9526  -0.9311509
## 9527   0.8895127
## 9536  -0.8613417
## 9537  -0.9233751
## 9547   0.9108639
## 9881   0.8916220
## 9913   0.8759197
## 9928   0.9044191
## 9977  -0.9170017
## 9979   0.9741924
## 9981   1.0000000
## 9983  -0.9170017
## 9988   0.8577641
## 10032 -0.9355816
## 10069 -0.8502340
## 10080 -0.9170017
## 10083 -0.9833717
## 10085 -0.9170017
## 10087  1.0000000
## 10190  1.0000000
## 10192  0.9520230
## 10240  0.9411901
## 10288  0.9741924
## 10289 -0.9833717
## 10293  0.9741924
## 10295 -0.9833717
## 10401  0.8916220
## 10433  0.8759197
## 10448  0.9044191
## 10496  1.0000000
## 10497 -0.9170017
## 10499  0.9741924
## 10503 -0.9170017
## 10602  1.0000000
## 10608  0.9520230
## 10612  0.8577641
## 10656 -0.9355816
## 10693 -0.8502340
## 10704 -0.9170017
## 10705  1.0000000
## 10707 -0.9833717
## 10709 -0.9170017
## 10810  0.9520230
## 10814  0.9520230
match_data_special
## # A tibble: 52,847 × 115
##    fixture_id halftime current_time        half_start_datetime
##         <dbl> <chr>    <dttm>              <dttm>             
##  1   19134453 1st-half 2024-08-16 19:01:19 2024-08-16 19:00:31
##  2   19134453 1st-half 2024-08-16 19:02:18 2024-08-16 19:00:31
##  3   19134453 1st-half 2024-08-16 19:03:19 2024-08-16 19:00:31
##  4   19134453 1st-half 2024-08-16 19:04:18 2024-08-16 19:00:31
##  5   19134453 1st-half 2024-08-16 19:05:19 2024-08-16 19:00:31
##  6   19134453 1st-half 2024-08-16 19:06:18 2024-08-16 19:00:31
##  7   19134453 1st-half 2024-08-16 19:07:18 2024-08-16 19:00:31
##  8   19134453 1st-half 2024-08-16 19:08:19 2024-08-16 19:00:31
##  9   19134453 1st-half 2024-08-16 19:09:19 2024-08-16 19:00:31
## 10   19134453 1st-half 2024-08-16 19:10:18 2024-08-16 19:00:31
## # ℹ 52,837 more rows
## # ℹ 111 more variables: match_start_datetime <dttm>, minute <dbl>,
## #   second <dbl>, latest_bookmaker_update <dttm>, suspended <lgl>,
## #   stopped <lgl>, `1` <dbl>, `2` <dbl>, X <dbl>, name <chr>, ticking <lgl>,
## #   `Accurate Crosses - away` <dbl>, `Accurate Crosses - home` <dbl>,
## #   `Assists - away` <dbl>, `Assists - home` <dbl>, `Attacks - away` <dbl>,
## #   `Attacks - home` <dbl>, `Ball Possession % - away` <dbl>, …
library(dplyr)
library(rpart)
library(rpart.plot)
library(caret)
## Loading required package: lattice
library(ggplot2)

numerical_columns <- match_data_special %>%
  select(where(is.numeric)) %>%
  colnames()

first_half_data <- match_data_special %>% filter(halftime == "1st-half")
second_half_data <- match_data_special %>% filter(halftime == "2nd-half")

train_decision_tree <- function(data, tree_columns, target, maxdepth = 12, minsplit = 4, cp = 0.004) {
  X <- data %>% select(all_of(tree_columns))
  y <- data[[target]]
  
  set.seed(42)
  train_indices <- createDataPartition(y, p = 0.8, list = FALSE)
  X_train <- X[train_indices, ]
  X_test <- X[-train_indices, ]
  y_train <- y[train_indices]
  y_test <- y[-train_indices]
  
  decision_tree <- rpart(y_train ~ ., 
                         data = X_train, 
                         method = "class", 
                         control = rpart.control(maxdepth = maxdepth, 
                                                 minsplit = minsplit, 
                                                 cp = cp))
  
  predictions <- predict(decision_tree, newdata = X_test, type = "class")
  confusion_matrix <- table(Predicted = predictions, Actual = y_test)
  accuracy <- sum(diag(confusion_matrix)) / sum(confusion_matrix)
  
  feature_importances <- decision_tree$variable.importance
  
  list(
    model = decision_tree,
    accuracy = accuracy,
    confusion_matrix = confusion_matrix,
    feature_importances = feature_importances
  )
}

tree_columns <- setdiff(numerical_columns, c("P_home", "P_away", "P_draw", 
                                             "P_home_norm", "P_away_norm", 
                                             "P_draw_norm", "name", "Total_odds", 
                                             "final_score", "P_home_minus_P_away", 
                                             "total_prob", "fixture_id", "halftime", "Assists - home", "Assists - away", "second", "Ball Possession % - away", "Substitutions - home"))

first_half_result <- train_decision_tree(first_half_data, tree_columns, "result")
print("First Half Accuracy:")
## [1] "First Half Accuracy:"
print(round(first_half_result$accuracy, 2))
## [1] 0.62
print("First Half Confusion Matrix:")
## [1] "First Half Confusion Matrix:"
print(first_half_result$confusion_matrix)
##          Actual
## Predicted    1    2    X
##         1 1668  230  438
##         2  369 1116  367
##         X  382  283  634
cat("Feature Importance (First Half):\n")
## Feature Importance (First Half):
importance_df_first_half <- data.frame(
  Feature = names(first_half_result$feature_importances),
  Importance = first_half_result$feature_importances
)
importance_df_first_half <- importance_df_first_half[order(-importance_df_first_half$Importance), ]
print(importance_df_first_half)
##                                                                 Feature
## 2                                                                     2
## 1                                                                     1
## X                                                                     X
## Goals - away                                               Goals - away
## Ball Possession % - home                       Ball Possession % - home
## Successful Passes Percentage - away Successful Passes Percentage - away
## Goals - home                                               Goals - home
## Shots On Target - away                           Shots On Target - away
## Shots On Target - home                           Shots On Target - home
## Shots Total - home                                   Shots Total - home
## Headers - away                                           Headers - away
## Successful Headers - away                     Successful Headers - away
## Shots Insidebox - home                           Shots Insidebox - home
## Saves - away                                               Saves - away
## Passes - away                                             Passes - away
## Corners - home                                           Corners - home
## Tackles - home                                           Tackles - home
## minute                                                           minute
## Interceptions - home                               Interceptions - home
## Successful Passes - away                       Successful Passes - away
## Challenges - home                                     Challenges - home
## Successful Interceptions - home         Successful Interceptions - home
## Successful Dribbles - home                   Successful Dribbles - home
## Accurate Crosses - away                         Accurate Crosses - away
## Key Passes - home                                     Key Passes - home
## Passes - home                                             Passes - home
## Attacks - away                                           Attacks - away
## Shots Total - away                                   Shots Total - away
## Ball Safe - away                                       Ball Safe - away
## Total Crosses - away                               Total Crosses - away
## Headers - home                                           Headers - home
## Successful Headers - home                     Successful Headers - home
## Total Crosses - home                               Total Crosses - home
## Ball Safe - home                                       Ball Safe - home
## Accurate Crosses - home                         Accurate Crosses - home
## Attacks - home                                           Attacks - home
## Dangerous Attacks - home                       Dangerous Attacks - home
## Successful Passes - home                       Successful Passes - home
## Shots Insidebox - away                           Shots Insidebox - away
## Shots Blocked - away                               Shots Blocked - away
## Key Passes - away                                     Key Passes - away
## Goal Attempts - home                               Goal Attempts - home
## Goal Attempts - away                               Goal Attempts - away
## Dribble Attempts - home                         Dribble Attempts - home
## Throwins - away                                         Throwins - away
## Dangerous Attacks - away                       Dangerous Attacks - away
## Dribble Attempts - away                         Dribble Attempts - away
## Successful Passes Percentage - home Successful Passes Percentage - home
## Injuries - home                                         Injuries - home
## Throwins - home                                         Throwins - home
## Injuries - away                                         Injuries - away
## Shots Outsidebox - home                         Shots Outsidebox - home
##                                      Importance
## 2                                   2684.165778
## 1                                   2410.840000
## X                                    776.977267
## Goals - away                         493.810993
## Ball Possession % - home             340.865975
## Successful Passes Percentage - away  273.639747
## Goals - home                         235.883354
## Shots On Target - away               193.649168
## Shots On Target - home               188.144787
## Shots Total - home                   162.734170
## Headers - away                        78.941232
## Successful Headers - away             56.281479
## Shots Insidebox - home                53.722623
## Saves - away                          52.583416
## Passes - away                         46.530768
## Corners - home                        45.162033
## Tackles - home                        45.120282
## minute                                42.985758
## Interceptions - home                  42.641307
## Successful Passes - away              40.203392
## Challenges - home                     40.132995
## Successful Interceptions - home       39.068780
## Successful Dribbles - home            38.992453
## Accurate Crosses - away               37.664640
## Key Passes - home                     35.754829
## Passes - home                         34.953696
## Attacks - away                        34.414704
## Shots Total - away                    32.805566
## Ball Safe - away                      32.748627
## Total Crosses - away                  30.526128
## Headers - home                        28.662979
## Successful Headers - home             28.240373
## Total Crosses - home                  27.357770
## Ball Safe - home                      24.984692
## Accurate Crosses - home               24.952862
## Attacks - home                        24.424101
## Dangerous Attacks - home              24.423344
## Successful Passes - home              21.078971
## Shots Insidebox - away                18.636558
## Shots Blocked - away                  16.992156
## Key Passes - away                     16.209107
## Goal Attempts - home                  15.119241
## Goal Attempts - away                  13.718231
## Dribble Attempts - home               12.921918
## Throwins - away                       10.402764
## Dangerous Attacks - away               7.839017
## Dribble Attempts - away                7.086955
## Successful Passes Percentage - home    6.020380
## Injuries - home                        5.428917
## Throwins - home                        4.239466
## Injuries - away                        2.977148
## Shots Outsidebox - home                2.947104
print("Decision Tree Details (First Half):")
## [1] "Decision Tree Details (First Half):"
printcp(first_half_result$model)  # Hangi parametrelere göre kırıldığını gösterir
## 
## Classification tree:
## rpart(formula = y_train ~ ., data = X_train, method = "class", 
##     control = rpart.control(maxdepth = maxdepth, minsplit = minsplit, 
##         cp = cp))
## 
## Variables actually used in tree construction:
##  [1] 1                                   2                                  
##  [3] Accurate Crosses - away             Ball Possession % - home           
##  [5] Ball Safe - away                    Corners - home                     
##  [7] Headers - away                      Interceptions - home               
##  [9] Shots On Target - home              Successful Dribbles - home         
## [11] Successful Passes Percentage - away Tackles - home                     
## [13] X                                  
## 
## Root node error: 12275/21954 = 0.55912
## 
## n= 21954 
## 
##         CP nsplit rel error  xerror      xstd
## 1 0.238126      0   1.00000 1.00000 0.0059930
## 2 0.004372      1   0.76187 0.76660 0.0059736
## 3 0.004334     11   0.70623 0.72049 0.0059203
## 4 0.004000     18   0.67022 0.68424 0.0058666
summary(first_half_result$model)  # Karar ağacının detaylı yapısını yazdırır
## Call:
## rpart(formula = y_train ~ ., data = X_train, method = "class", 
##     control = rpart.control(maxdepth = maxdepth, minsplit = minsplit, 
##         cp = cp))
##   n= 21954 
## 
##            CP nsplit rel error    xerror        xstd
## 1 0.238126273      0 1.0000000 1.0000000 0.005993047
## 2 0.004372030      1 0.7618737 0.7665988 0.005973578
## 3 0.004334012     11 0.7062322 0.7204888 0.005920350
## 4 0.004000000     18 0.6702240 0.6842363 0.005866583
## 
## Variable importance
##                                   2                                   1 
##                                  30                                  27 
##                                   X                        Goals - away 
##                                   9                                   6 
##            Ball Possession % - home Successful Passes Percentage - away 
##                                   4                                   3 
##                        Goals - home              Shots On Target - away 
##                                   3                                   2 
##              Shots On Target - home                  Shots Total - home 
##                                   2                                   2 
##                      Headers - away           Successful Headers - away 
##                                   1                                   1 
##              Shots Insidebox - home                        Saves - away 
##                                   1                                   1 
##                       Passes - away                      Corners - home 
##                                   1                                   1 
##                      Tackles - home 
##                                   1 
## 
## Node number 1: 21954 observations,    complexity param=0.2381263
##   predicted class=1  expected loss=0.5591236  P(node) =1
##     class counts:  9679  6517  5758
##    probabilities: 0.441 0.297 0.262 
##   left son=2 (14100 obs) right son=3 (7854 obs)
##   Primary splits:
##       2            < 2.61  to the right, improve=1631.0870, (0 missing)
##       1            < 1.925 to the left,  improve=1621.5490, (0 missing)
##       Goals - home < 0.5   to the right, improve= 675.1070, (0 missing)
##       X            < 4.875 to the right, improve= 384.1266, (0 missing)
##       Goals - away < 0.5   to the left,  improve= 318.0188, (0 missing)
##   Surrogate splits:
##       1                                   < 2.775 to the left,  agree=0.957, adj=0.880, (0 split)
##       Goals - away                        < 0.5   to the left,  agree=0.738, adj=0.268, (0 split)
##       Ball Possession % - home            < 38.5  to the right, agree=0.697, adj=0.154, (0 split)
##       Successful Passes Percentage - away < 86.5  to the left,  agree=0.686, adj=0.122, (0 split)
##       Shots On Target - away              < 1.5   to the left,  agree=0.682, adj=0.111, (0 split)
## 
## Node number 2: 14100 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.418156  P(node) =0.642252
##     class counts:  8204  2119  3777
##    probabilities: 0.582 0.150 0.268 
##   left son=4 (4959 obs) right son=5 (9141 obs)
##   Primary splits:
##       2                      < 6.75  to the right, improve=676.0262, (0 missing)
##       1                      < 1.42  to the left,  improve=614.7590, (0 missing)
##       X                      < 4.415 to the right, improve=576.4455, (0 missing)
##       Goals - home           < 0.5   to the right, improve=237.6263, (0 missing)
##       Shots On Target - home < 1.5   to the right, improve=105.3575, (0 missing)
##   Surrogate splits:
##       1                      < 1.515 to the left,  agree=0.960, adj=0.886, (0 split)
##       X                      < 4.265 to the right, agree=0.913, adj=0.753, (0 split)
##       Goals - home           < 0.5   to the right, agree=0.770, adj=0.346, (0 split)
##       Shots On Target - home < 1.5   to the right, agree=0.712, adj=0.180, (0 split)
##       Shots Total - home     < 5.5   to the right, agree=0.700, adj=0.146, (0 split)
## 
## Node number 3: 7854 observations,    complexity param=0.004334012
##   predicted class=2  expected loss=0.4400306  P(node) =0.357748
##     class counts:  1475  4398  1981
##    probabilities: 0.188 0.560 0.252 
##   left son=6 (6000 obs) right son=7 (1854 obs)
##   Primary splits:
##       2            < 1.42  to the right, improve=217.70540, (0 missing)
##       X            < 4.265 to the left,  improve=211.15870, (0 missing)
##       1            < 7.25  to the left,  improve=202.93600, (0 missing)
##       Goals - away < 1.5   to the right, improve= 60.46323, (0 missing)
##       Goals - home < 0.5   to the right, improve= 51.34057, (0 missing)
##   Surrogate splits:
##       1                      < 7.75  to the left,  agree=0.972, adj=0.882, (0 split)
##       X                      < 4.415 to the left,  agree=0.969, adj=0.868, (0 split)
##       Goals - away           < 1.5   to the left,  agree=0.826, adj=0.262, (0 split)
##       Shots Total - away     < 7.5   to the left,  agree=0.780, adj=0.069, (0 split)
##       Shots On Target - away < 3.5   to the left,  agree=0.778, adj=0.061, (0 split)
## 
## Node number 4: 4959 observations
##   predicted class=1  expected loss=0.1774551  P(node) =0.2258814
##     class counts:  4079   283   597
##    probabilities: 0.823 0.057 0.120 
## 
## Node number 5: 9141 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.5487365  P(node) =0.4163706
##     class counts:  4125  1836  3180
##    probabilities: 0.451 0.201 0.348 
##   left son=10 (3459 obs) right son=11 (5682 obs)
##   Primary splits:
##       1            < 1.925 to the left,  improve=80.40346, (0 missing)
##       2            < 3.775 to the right, improve=64.04850, (0 missing)
##       X            < 3.45  to the right, improve=49.52885, (0 missing)
##       Goals - away < 0.5   to the right, improve=39.20633, (0 missing)
##       Fouls - home < 3.5   to the left,  improve=35.68039, (0 missing)
##   Surrogate splits:
##       2                                   < 4.1   to the right, agree=0.902, adj=0.742, (0 split)
##       X                                   < 3.45  to the right, agree=0.823, adj=0.533, (0 split)
##       Ball Possession % - home            < 64.5  to the right, agree=0.670, adj=0.128, (0 split)
##       Successful Passes Percentage - home < 89.5  to the right, agree=0.650, adj=0.075, (0 split)
##       Attacks - away                      < 7.5   to the left,  agree=0.636, adj=0.039, (0 split)
## 
## Node number 6: 6000 observations,    complexity param=0.004334012
##   predicted class=2  expected loss=0.5155  P(node) =0.2732987
##     class counts:  1373  2907  1720
##    probabilities: 0.229 0.484 0.287 
##   left son=12 (2248 obs) right son=13 (3752 obs)
##   Primary splits:
##       1                          < 3.55  to the left,  improve=77.02462, (0 missing)
##       Successful Dribbles - home < 1.5   to the left,  improve=66.21267, (0 missing)
##       Shots On Target - home     < 0.5   to the left,  improve=55.00707, (0 missing)
##       Throwins - away            < 8.5   to the left,  improve=51.07116, (0 missing)
##       Goals - away               < 0.5   to the right, improve=46.61765, (0 missing)
##   Surrogate splits:
##       2                        < 2.15  to the right, agree=0.863, adj=0.635, (0 split)
##       Successful Passes - away < 41.5  to the left,  agree=0.652, adj=0.072, (0 split)
##       Ball Possession % - home < 64.5  to the right, agree=0.651, adj=0.069, (0 split)
##       Passes - away            < 40.5  to the left,  agree=0.649, adj=0.062, (0 split)
##       Attacks - away           < 11.5  to the left,  agree=0.646, adj=0.056, (0 split)
## 
## Node number 7: 1854 observations
##   predicted class=2  expected loss=0.1957929  P(node) =0.0844493
##     class counts:   102  1491   261
##    probabilities: 0.055 0.804 0.141 
## 
## Node number 10: 3459 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.4518647  P(node) =0.1575567
##     class counts:  1896   480  1083
##    probabilities: 0.548 0.139 0.313 
##   left son=20 (446 obs) right son=21 (3013 obs)
##   Primary splits:
##       Headers - away                      < 8.5   to the right, improve=41.78433, (0 missing)
##       Successful Passes Percentage - home < 82.5  to the left,  improve=40.26706, (0 missing)
##       Successful Headers - away           < 5.5   to the right, improve=35.74819, (0 missing)
##       Substitutions - away                < 0.5   to the left,  improve=31.74343, (0 missing)
##       Interceptions - home                < 1.5   to the left,  improve=25.07350, (0 missing)
##   Surrogate splits:
##       Successful Headers - away < 4.5   to the right, agree=0.950, adj=0.612, (0 split)
##       Headers - home            < 9.5   to the right, agree=0.912, adj=0.314, (0 split)
##       Successful Headers - home < 5.5   to the right, agree=0.910, adj=0.298, (0 split)
##       Total Crosses - away      < 8.5   to the right, agree=0.890, adj=0.143, (0 split)
##       Throwins - away           < 9.5   to the right, agree=0.885, adj=0.108, (0 split)
## 
## Node number 11: 5682 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.6077086  P(node) =0.2588139
##     class counts:  2229  1356  2097
##    probabilities: 0.392 0.239 0.369 
##   left son=22 (2585 obs) right son=23 (3097 obs)
##   Primary splits:
##       2                    < 3.225 to the left,  improve=41.66928, (0 missing)
##       Interceptions - home < 6.5   to the left,  improve=34.53119, (0 missing)
##       Challenges - home    < 6.5   to the left,  improve=33.62570, (0 missing)
##       Total Crosses - away < 5.5   to the left,  improve=33.02818, (0 missing)
##       Goal Attempts - home < 2.5   to the right, improve=32.36730, (0 missing)
##   Surrogate splits:
##       1                                   < 2.325 to the right, agree=0.755, adj=0.461, (0 split)
##       Successful Passes Percentage - away < 85.5  to the right, agree=0.616, adj=0.156, (0 split)
##       Accurate Crosses - home             < 0.5   to the left,  agree=0.601, adj=0.124, (0 split)
##       Attacks - home                      < 22.5  to the left,  agree=0.600, adj=0.120, (0 split)
##       Throwins - home                     < 4.5   to the left,  agree=0.591, adj=0.102, (0 split)
## 
## Node number 12: 2248 observations,    complexity param=0.004334012
##   predicted class=2  expected loss=0.6107651  P(node) =0.1023959
##     class counts:   762   875   611
##    probabilities: 0.339 0.389 0.272 
##   left son=24 (2076 obs) right son=25 (172 obs)
##   Primary splits:
##       Successful Dribbles - home          < 3.5   to the left,  improve=38.99245, (0 missing)
##       Corners - home                      < 2.5   to the right, improve=33.21658, (0 missing)
##       Successful Passes Percentage - away < 86.5  to the left,  improve=27.99142, (0 missing)
##       Successful Passes Percentage - home < 80.5  to the right, improve=26.08567, (0 missing)
##       X                                   < 3.55  to the right, improve=25.86873, (0 missing)
##   Surrogate splits:
##       Dribble Attempts - home  < 7.5   to the left,  agree=0.949, adj=0.331, (0 split)
##       Attacks - home           < 55.5  to the left,  agree=0.932, adj=0.116, (0 split)
##       Shots Outsidebox - home  < 4.5   to the left,  agree=0.929, adj=0.076, (0 split)
##       Dangerous Attacks - home < 33.5  to the left,  agree=0.928, adj=0.064, (0 split)
##       Saves - away             < 2.5   to the left,  agree=0.928, adj=0.058, (0 split)
## 
## Node number 13: 3752 observations,    complexity param=0.004334012
##   predicted class=2  expected loss=0.4584222  P(node) =0.1709028
##     class counts:   611  2032  1109
##    probabilities: 0.163 0.542 0.296 
##   left son=26 (1925 obs) right son=27 (1827 obs)
##   Primary splits:
##       Shots On Target - home     < 0.5   to the left,  improve=64.10606, (0 missing)
##       Successful Dribbles - home < 1.5   to the right, improve=48.32103, (0 missing)
##       Shots On Target - away     < 2.5   to the right, improve=47.72801, (0 missing)
##       Passes - away              < 235.5 to the left,  improve=46.62827, (0 missing)
##       Throwins - away            < 9.5   to the left,  improve=46.60922, (0 missing)
##   Surrogate splits:
##       Saves - away           < 0.5   to the left,  agree=0.895, adj=0.785, (0 split)
##       Shots Total - home     < 1.5   to the left,  agree=0.820, adj=0.630, (0 split)
##       Key Passes - home      < 1.5   to the left,  agree=0.785, adj=0.558, (0 split)
##       Shots Insidebox - home < 1.5   to the left,  agree=0.771, adj=0.530, (0 split)
##       minute                 < 23.5  to the left,  agree=0.721, adj=0.428, (0 split)
## 
## Node number 20: 446 observations
##   predicted class=1  expected loss=0.2511211  P(node) =0.0203152
##     class counts:   334    63    49
##    probabilities: 0.749 0.141 0.110 
## 
## Node number 21: 3013 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.4815798  P(node) =0.1372415
##     class counts:  1562   417  1034
##    probabilities: 0.518 0.138 0.343 
##   left son=42 (2027 obs) right son=43 (986 obs)
##   Primary splits:
##       Interceptions - home            < 1.5   to the left,  improve=42.64131, (0 missing)
##       Challenges - away               < 5.5   to the left,  improve=42.00108, (0 missing)
##       Challenges - home               < 1.5   to the left,  improve=41.66064, (0 missing)
##       Interceptions - away            < 5.5   to the left,  improve=40.81529, (0 missing)
##       Successful Interceptions - home < 4.5   to the left,  improve=37.80986, (0 missing)
##   Surrogate splits:
##       Challenges - home               < 1.5   to the left,  agree=0.981, adj=0.941, (0 split)
##       Successful Interceptions - home < 1.5   to the left,  agree=0.799, adj=0.384, (0 split)
##       Passes - home                   < 114.5 to the left,  agree=0.789, adj=0.356, (0 split)
##       Attacks - home                  < 29.5  to the left,  agree=0.787, adj=0.349, (0 split)
##       Passes - away                   < 92.5  to the left,  agree=0.783, adj=0.338, (0 split)
## 
## Node number 22: 2585 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.5907157  P(node) =0.1177462
##     class counts:  1058   762   765
##    probabilities: 0.409 0.295 0.296 
##   left son=44 (1248 obs) right son=45 (1337 obs)
##   Primary splits:
##       Corners - home       < 0.5   to the right, improve=45.16203, (0 missing)
##       Long Passes - home   < 3.5   to the right, improve=43.58502, (0 missing)
##       Headers - away       < 2.5   to the right, improve=37.26344, (0 missing)
##       Total Crosses - home < 4.5   to the left,  improve=33.17211, (0 missing)
##       Corners - away       < 2.5   to the right, improve=31.75284, (0 missing)
##   Surrogate splits:
##       Total Crosses - home     < 2.5   to the right, agree=0.810, adj=0.606, (0 split)
##       Shots Total - home       < 1.5   to the right, agree=0.767, adj=0.518, (0 split)
##       Dangerous Attacks - home < 7.5   to the right, agree=0.752, adj=0.486, (0 split)
##       Accurate Crosses - home  < 0.5   to the right, agree=0.729, adj=0.438, (0 split)
##       Shots Insidebox - home   < 1.5   to the right, agree=0.728, adj=0.437, (0 split)
## 
## Node number 23: 3097 observations,    complexity param=0.00437203
##   predicted class=X  expected loss=0.5699064  P(node) =0.1410677
##     class counts:  1171   594  1332
##    probabilities: 0.378 0.192 0.430 
##   left son=46 (561 obs) right son=47 (2536 obs)
##   Primary splits:
##       Ball Safe - away       < 12.5  to the right, improve=32.74863, (0 missing)
##       Fouls - away           < 6.5   to the left,  improve=29.66562, (0 missing)
##       Counter Attacks - home < 2.5   to the left,  improve=25.11336, (0 missing)
##       Ball Safe - home       < 7.5   to the right, improve=24.04746, (0 missing)
##       Headers - home         < 1.5   to the left,  improve=23.04691, (0 missing)
##   Surrogate splits:
##       Ball Safe - home     < 15.5  to the right, agree=0.957, adj=0.763, (0 split)
##       Goal Attempts - home < 0.5   to the right, agree=0.902, adj=0.462, (0 split)
##       Goal Attempts - away < 1.5   to the right, agree=0.895, adj=0.419, (0 split)
##       Injuries - home      < 0.5   to the right, agree=0.849, adj=0.166, (0 split)
##       Injuries - away      < 0.5   to the right, agree=0.835, adj=0.091, (0 split)
## 
## Node number 24: 2076 observations,    complexity param=0.004334012
##   predicted class=2  expected loss=0.5833333  P(node) =0.09456136
##     class counts:   701   865   510
##    probabilities: 0.338 0.417 0.246 
##   left son=48 (1403 obs) right son=49 (673 obs)
##   Primary splits:
##       Successful Passes Percentage - away < 86.5  to the left,  improve=40.17345, (0 missing)
##       Throwins - home                     < 8.5   to the left,  improve=30.17830, (0 missing)
##       Shots Total - away                  < 6.5   to the right, improve=25.14732, (0 missing)
##       Corners - home                      < 2.5   to the right, improve=24.26579, (0 missing)
##       X                                   < 3.55  to the right, improve=22.53667, (0 missing)
##   Surrogate splits:
##       Ball Possession % - home < 32.5  to the right, agree=0.728, adj=0.160, (0 split)
##       Successful Passes - away < 113.5 to the left,  agree=0.723, adj=0.146, (0 split)
##       Passes - away            < 156.5 to the left,  agree=0.703, adj=0.083, (0 split)
##       Goals - home             < 0.5   to the left,  agree=0.693, adj=0.052, (0 split)
##       Shots On Target - home   < 2.5   to the left,  agree=0.692, adj=0.051, (0 split)
## 
## Node number 25: 172 observations
##   predicted class=X  expected loss=0.4127907  P(node) =0.007834563
##     class counts:    61    10   101
##    probabilities: 0.355 0.058 0.587 
## 
## Node number 26: 1925 observations
##   predicted class=2  expected loss=0.3844156  P(node) =0.08768334
##     class counts:   364  1185   376
##    probabilities: 0.189 0.616 0.195 
## 
## Node number 27: 1827 observations,    complexity param=0.004334012
##   predicted class=2  expected loss=0.5363985  P(node) =0.08321946
##     class counts:   247   847   733
##    probabilities: 0.135 0.464 0.401 
##   left son=54 (1152 obs) right son=55 (675 obs)
##   Primary splits:
##       Ball Possession % - home            < 41.5  to the right, improve=67.75384, (0 missing)
##       Successful Passes Percentage - home < 72.5  to the right, improve=51.82687, (0 missing)
##       Successful Passes Percentage - away < 83.5  to the left,  improve=50.34655, (0 missing)
##       Fouls - away                        < 1.5   to the right, improve=49.70352, (0 missing)
##       Long Passes - home                  < 3.5   to the right, improve=31.55224, (0 missing)
##   Surrogate splits:
##       Successful Passes Percentage - away < 86.5  to the left,  agree=0.779, adj=0.401, (0 split)
##       Successful Passes - away            < 192.5 to the left,  agree=0.770, adj=0.376, (0 split)
##       Passes - away                       < 217.5 to the left,  agree=0.761, adj=0.354, (0 split)
##       Successful Passes - home            < 83.5  to the right, agree=0.745, adj=0.311, (0 split)
##       Passes - home                       < 104.5 to the right, agree=0.738, adj=0.292, (0 split)
## 
## Node number 42: 2027 observations
##   predicted class=1  expected loss=0.4158855  P(node) =0.09232942
##     class counts:  1184   243   600
##    probabilities: 0.584 0.120 0.296 
## 
## Node number 43: 986 observations,    complexity param=0.00437203
##   predicted class=X  expected loss=0.5598377  P(node) =0.04491209
##     class counts:   378   174   434
##    probabilities: 0.383 0.176 0.440 
##   left son=86 (481 obs) right son=87 (505 obs)
##   Primary splits:
##       Accurate Crosses - away             < 0.5   to the left,  improve=37.66464, (0 missing)
##       Successful Passes Percentage - home < 80.5  to the left,  improve=35.59980, (0 missing)
##       Interceptions - away                < 5.5   to the left,  improve=34.58036, (0 missing)
##       Challenges - away                   < 5.5   to the left,  improve=33.92129, (0 missing)
##       Goal Attempts - home                < 3.5   to the left,  improve=32.86290, (0 missing)
##   Surrogate splits:
##       Shots Insidebox - away < 0.5   to the left,  agree=0.754, adj=0.495, (0 split)
##       Shots Total - away     < 1.5   to the left,  agree=0.742, adj=0.472, (0 split)
##       Total Crosses - away   < 2.5   to the left,  agree=0.733, adj=0.453, (0 split)
##       Shots Blocked - away   < 0.5   to the left,  agree=0.732, adj=0.451, (0 split)
##       Key Passes - away      < 0.5   to the left,  agree=0.722, adj=0.430, (0 split)
## 
## Node number 44: 1248 observations,    complexity param=0.00437203
##   predicted class=2  expected loss=0.59375  P(node) =0.05684613
##     class counts:   449   507   292
##    probabilities: 0.360 0.406 0.234 
##   left son=88 (191 obs) right son=89 (1057 obs)
##   Primary splits:
##       Tackles - home          < 7.5   to the right, improve=45.12028, (0 missing)
##       Dribble Attempts - away < 3.5   to the right, improve=32.52919, (0 missing)
##       Goals - away            < 0.5   to the right, improve=28.48359, (0 missing)
##       Challenges - home       < 7.5   to the left,  improve=28.15158, (0 missing)
##       Shots On Target - away  < 0.5   to the right, improve=27.36109, (0 missing)
##   Surrogate splits:
##       Successful Interceptions - home < 7.5   to the right, agree=0.924, adj=0.503, (0 split)
##       Dribble Attempts - away         < 10.5  to the right, agree=0.871, adj=0.157, (0 split)
##       Throwins - away                 < 11.5  to the right, agree=0.867, adj=0.131, (0 split)
##       Attacks - away                  < 56.5  to the right, agree=0.858, adj=0.073, (0 split)
##       Successful Passes - away        < 251   to the right, agree=0.858, adj=0.073, (0 split)
## 
## Node number 45: 1337 observations,    complexity param=0.00437203
##   predicted class=1  expected loss=0.5445026  P(node) =0.06090006
##     class counts:   609   255   473
##    probabilities: 0.455 0.191 0.354 
##   left son=90 (478 obs) right son=91 (859 obs)
##   Primary splits:
##       Headers - away          < 2.5   to the right, improve=37.15690, (0 missing)
##       Shots Off Target - away < 1.5   to the right, improve=36.09413, (0 missing)
##       Long Passes - home      < 4.5   to the right, improve=32.79158, (0 missing)
##       Shots Total - away      < 2.5   to the right, improve=23.74699, (0 missing)
##       Key Passes - away       < 3.5   to the right, improve=23.02399, (0 missing)
##   Surrogate splits:
##       Successful Headers - away < 1.5   to the right, agree=0.938, adj=0.826, (0 split)
##       Attacks - away            < 24.5  to the right, agree=0.801, adj=0.444, (0 split)
##       Successful Headers - home < 1.5   to the right, agree=0.794, adj=0.425, (0 split)
##       minute                    < 20.5  to the right, agree=0.792, adj=0.418, (0 split)
##       Headers - home            < 2.5   to the right, agree=0.792, adj=0.418, (0 split)
## 
## Node number 46: 561 observations
##   predicted class=1  expected loss=0.4474153  P(node) =0.02555343
##     class counts:   310    77   174
##    probabilities: 0.553 0.137 0.310 
## 
## Node number 47: 2536 observations
##   predicted class=X  expected loss=0.5433754  P(node) =0.1155143
##     class counts:   861   517  1158
##    probabilities: 0.340 0.204 0.457 
## 
## Node number 48: 1403 observations
##   predicted class=2  expected loss=0.5067712  P(node) =0.06390635
##     class counts:   442   692   269
##    probabilities: 0.315 0.493 0.192 
## 
## Node number 49: 673 observations,    complexity param=0.004334012
##   predicted class=1  expected loss=0.615156  P(node) =0.03065501
##     class counts:   259   173   241
##    probabilities: 0.385 0.257 0.358 
##   left son=98 (285 obs) right son=99 (388 obs)
##   Primary splits:
##       X                               < 3.325 to the right, improve=36.03419, (0 missing)
##       Accurate Crosses - home         < 0.5   to the left,  improve=28.52197, (0 missing)
##       Successful Interceptions - home < 2.5   to the right, improve=22.93164, (0 missing)
##       Goals - away                    < 0.5   to the right, improve=22.81888, (0 missing)
##       2                               < 2.225 to the left,  improve=22.46129, (0 missing)
##   Surrogate splits:
##       2                        < 2.15  to the left,  agree=0.684, adj=0.253, (0 split)
##       1                        < 3.05  to the left,  agree=0.672, adj=0.225, (0 split)
##       Dangerous Attacks - away < 6.5   to the left,  agree=0.669, adj=0.218, (0 split)
##       Total Crosses - away     < 2.5   to the left,  agree=0.664, adj=0.207, (0 split)
##       Attacks - away           < 16.5  to the left,  agree=0.661, adj=0.200, (0 split)
## 
## Node number 54: 1152 observations
##   predicted class=2  expected loss=0.4678819  P(node) =0.05247335
##     class counts:   215   613   324
##    probabilities: 0.187 0.532 0.281 
## 
## Node number 55: 675 observations
##   predicted class=X  expected loss=0.3940741  P(node) =0.03074611
##     class counts:    32   234   409
##    probabilities: 0.047 0.347 0.606 
## 
## Node number 86: 481 observations
##   predicted class=1  expected loss=0.4906445  P(node) =0.02190945
##     class counts:   245    98   138
##    probabilities: 0.509 0.204 0.287 
## 
## Node number 87: 505 observations
##   predicted class=X  expected loss=0.4138614  P(node) =0.02300264
##     class counts:   133    76   296
##    probabilities: 0.263 0.150 0.586 
## 
## Node number 88: 191 observations
##   predicted class=1  expected loss=0.4031414  P(node) =0.008700009
##     class counts:   114     9    68
##    probabilities: 0.597 0.047 0.356 
## 
## Node number 89: 1057 observations
##   predicted class=2  expected loss=0.5288553  P(node) =0.04814612
##     class counts:   335   498   224
##    probabilities: 0.317 0.471 0.212 
## 
## Node number 90: 478 observations
##   predicted class=1  expected loss=0.3702929  P(node) =0.0217728
##     class counts:   301    72   105
##    probabilities: 0.630 0.151 0.220 
## 
## Node number 91: 859 observations
##   predicted class=X  expected loss=0.5715949  P(node) =0.03912727
##     class counts:   308   183   368
##    probabilities: 0.359 0.213 0.428 
## 
## Node number 98: 285 observations
##   predicted class=1  expected loss=0.3964912  P(node) =0.01298169
##     class counts:   172    49    64
##    probabilities: 0.604 0.172 0.225 
## 
## Node number 99: 388 observations
##   predicted class=X  expected loss=0.5438144  P(node) =0.01767332
##     class counts:    87   124   177
##    probabilities: 0.224 0.320 0.456
rpart.plot(first_half_result$model, main = "First Half Decision Tree", type = 3, extra = 104)
## Warning: Cannot retrieve the data used to build the model (so cannot determine roundint and is.binary for the variables).
## To silence this warning:
##     Call rpart.plot with roundint=FALSE,
##     or rebuild the rpart model with model=TRUE.

second_half_result <- train_decision_tree(second_half_data, tree_columns, "result")
print("Second Half Accuracy:")
## [1] "Second Half Accuracy:"
print(round(second_half_result$accuracy, 2))
## [1] 0.74
print("Second Half Confusion Matrix:")
## [1] "Second Half Confusion Matrix:"
print(second_half_result$confusion_matrix)
##          Actual
## Predicted    1    2    X
##         1 1830  119  411
##         2   69 1138  279
##         X  250  205  779
cat("Feature Importance (Second Half):\n")
## Feature Importance (Second Half):
importance_df_second_half <- data.frame(
  Feature = names(second_half_result$feature_importances),
  Importance = second_half_result$feature_importances
)
importance_df_second_half <- importance_df_second_half[order(-importance_df_second_half$Importance), ]
print(importance_df_second_half)
##                                                                 Feature
## 1                                                                     1
## 2                                                                     2
## X                                                                     X
## Goals - home                                               Goals - home
## Goals - away                                               Goals - away
## Shots On Target - home                           Shots On Target - home
## Shots On Target - away                           Shots On Target - away
## minute                                                           minute
## Shots Insidebox - away                           Shots Insidebox - away
## Substitutions - away                               Substitutions - away
## Attacks - away                                           Attacks - away
## Successful Passes - away                       Successful Passes - away
## Passes - away                                             Passes - away
## Ball Possession % - home                       Ball Possession % - home
## Counter Attacks - away                           Counter Attacks - away
## Shots Off Target - away                         Shots Off Target - away
## Successful Passes Percentage - away Successful Passes Percentage - away
## Successful Passes Percentage - home Successful Passes Percentage - home
## Total Crosses - home                               Total Crosses - home
## Counter Attacks - home                           Counter Attacks - home
## Throwins - home                                         Throwins - home
## Shots Total - away                                   Shots Total - away
## Saves - away                                               Saves - away
## Offsides - home                                         Offsides - home
## Total Crosses - away                               Total Crosses - away
## Dangerous Attacks - away                       Dangerous Attacks - away
##                                       Importance
## 1                                   4757.1307064
## 2                                   4232.6376530
## X                                   2728.8005056
## Goals - home                        1423.3571831
## Goals - away                        1417.5046962
## Shots On Target - home               522.9331682
## Shots On Target - away               224.5596169
## minute                               167.2934528
## Shots Insidebox - away               135.5681141
## Substitutions - away                 114.5769323
## Attacks - away                        95.7634486
## Successful Passes - away              56.3311583
## Passes - away                         53.3804034
## Ball Possession % - home              37.0335593
## Counter Attacks - away                33.4242646
## Shots Off Target - away               30.5672919
## Successful Passes Percentage - away   27.1198813
## Successful Passes Percentage - home   10.1907121
## Total Crosses - home                   9.8403151
## Counter Attacks - home                 2.8245857
## Throwins - home                        2.7788447
## Shots Total - away                     2.5009602
## Saves - away                           1.8830572
## Offsides - home                        0.5557689
## Total Crosses - away                   0.5557689
## Dangerous Attacks - away               0.2778845
print("Decision Tree Details (Second Half):")
## [1] "Decision Tree Details (Second Half):"
printcp(second_half_result$model)
## 
## Classification tree:
## rpart(formula = y_train ~ ., data = X_train, method = "class", 
##     control = rpart.control(maxdepth = maxdepth, minsplit = minsplit, 
##         cp = cp))
## 
## Variables actually used in tree construction:
## [1] 1                        2                        Counter Attacks - away  
## [4] Goals - away             Shots Off Target - away  Successful Passes - away
## [7] X                       
## 
## Root node error: 11729/20326 = 0.57704
## 
## n= 20326 
## 
##          CP nsplit rel error  xerror      xstd
## 1 0.3181857      0   1.00000 1.00000 0.0060051
## 2 0.2042800      1   0.68181 0.68181 0.0059380
## 3 0.0053287      2   0.47753 0.47753 0.0054309
## 4 0.0049876      6   0.45622 0.47165 0.0054100
## 5 0.0040000      8   0.44624 0.45792 0.0053596
summary(second_half_result$model)
## Call:
## rpart(formula = y_train ~ ., data = X_train, method = "class", 
##     control = rpart.control(maxdepth = maxdepth, minsplit = minsplit, 
##         cp = cp))
##   n= 20326 
## 
##            CP nsplit rel error    xerror        xstd
## 1 0.318185694      0 1.0000000 1.0000000 0.006005055
## 2 0.204279990      1 0.6818143 0.6818143 0.005938005
## 3 0.005328673      2 0.4775343 0.4775343 0.005430921
## 4 0.004987637      6 0.4562196 0.4716515 0.005409996
## 5 0.004000000      8 0.4462444 0.4579248 0.005359618
## 
## Variable importance
##                      1                      2                      X 
##                     30                     26                     17 
##           Goals - home           Goals - away Shots On Target - home 
##                      9                      9                      3 
## Shots On Target - away                 minute Shots Insidebox - away 
##                      1                      1                      1 
##   Substitutions - away         Attacks - away 
##                      1                      1 
## 
## Node number 1: 20326 observations,    complexity param=0.3181857
##   predicted class=1  expected loss=0.5770442  P(node) =1
##     class counts:  8597  5852  5877
##    probabilities: 0.423 0.288 0.289 
##   left son=2 (8165 obs) right son=3 (12161 obs)
##   Primary splits:
##       1            < 2.15  to the left,  improve=3403.4630, (0 missing)
##       2            < 1.925 to the right, improve=2962.0500, (0 missing)
##       Goals - home < 0.5   to the right, improve=1518.9900, (0 missing)
##       X            < 2.685 to the right, improve=1197.2940, (0 missing)
##       Goals - away < 1.5   to the left,  improve= 933.3042, (0 missing)
##   Surrogate splits:
##       2                      < 9.25  to the right, agree=0.896, adj=0.740, (0 split)
##       Goals - home           < 1.5   to the right, agree=0.762, adj=0.408, (0 split)
##       X                      < 3.675 to the right, agree=0.713, adj=0.285, (0 split)
##       Goals - away           < 0.5   to the left,  agree=0.683, adj=0.211, (0 split)
##       Shots On Target - home < 4.5   to the right, agree=0.658, adj=0.149, (0 split)
## 
## Node number 2: 8165 observations
##   predicted class=1  expected loss=0.1740355  P(node) =0.4017023
##     class counts:  6744   267  1154
##    probabilities: 0.826 0.033 0.141 
## 
## Node number 3: 12161 observations,    complexity param=0.20428
##   predicted class=2  expected loss=0.540745  P(node) =0.5982977
##     class counts:  1853  5585  4723
##    probabilities: 0.152 0.459 0.388 
##   left son=6 (5204 obs) right son=7 (6957 obs)
##   Primary splits:
##       2                      < 1.69  to the left,  improve=1585.0610, (0 missing)
##       X                      < 3.325 to the right, improve=1558.8880, (0 missing)
##       1                      < 16    to the right, improve=1202.3820, (0 missing)
##       Goals - away           < 1.5   to the right, improve= 562.4236, (0 missing)
##       Shots Insidebox - away < 3.5   to the right, improve= 144.6099, (0 missing)
##   Surrogate splits:
##       X                      < 3.225 to the right, agree=0.986, adj=0.966, (0 split)
##       1                      < 10.5  to the right, agree=0.899, adj=0.763, (0 split)
##       Goals - away           < 1.5   to the right, agree=0.750, adj=0.415, (0 split)
##       Shots On Target - away < 3.5   to the right, agree=0.626, adj=0.126, (0 split)
##       Shots Insidebox - away < 6.5   to the right, agree=0.608, adj=0.085, (0 split)
## 
## Node number 6: 5204 observations
##   predicted class=2  expected loss=0.2052267  P(node) =0.2560268
##     class counts:   190  4136   878
##    probabilities: 0.037 0.795 0.169 
## 
## Node number 7: 6957 observations,    complexity param=0.005328673
##   predicted class=X  expected loss=0.4473192  P(node) =0.342271
##     class counts:  1663  1449  3845
##    probabilities: 0.239 0.208 0.553 
##   left son=14 (4290 obs) right son=15 (2667 obs)
##   Primary splits:
##       X                    < 1.69  to the right, improve=212.0588, (0 missing)
##       2                    < 4.875 to the left,  improve=179.2189, (0 missing)
##       minute               < 28.5  to the left,  improve=170.7462, (0 missing)
##       Substitutions - away < 1.5   to the left,  improve=160.2496, (0 missing)
##       1                    < 6.25  to the left,  improve=151.9271, (0 missing)
##   Surrogate splits:
##       minute               < 29.5  to the left,  agree=0.919, adj=0.789, (0 split)
##       Substitutions - away < 2.5   to the left,  agree=0.824, adj=0.540, (0 split)
##       2                    < 6.25  to the left,  agree=0.795, adj=0.465, (0 split)
##       1                    < 6.25  to the left,  agree=0.755, adj=0.361, (0 split)
##       Attacks - away       < 72.5  to the left,  agree=0.737, adj=0.314, (0 split)
## 
## Node number 14: 4290 observations,    complexity param=0.005328673
##   predicted class=X  expected loss=0.5596737  P(node) =0.2110597
##     class counts:  1255  1146  1889
##    probabilities: 0.293 0.267 0.440 
##   left son=28 (3022 obs) right son=29 (1268 obs)
##   Primary splits:
##       1                        < 4.165 to the left,  improve=67.97437, (0 missing)
##       Shots Insidebox - home   < 4.5   to the right, improve=51.17675, (0 missing)
##       Successful Passes - away < 179.5 to the left,  improve=48.93978, (0 missing)
##       2                        < 3.325 to the right, improve=46.78760, (0 missing)
##       Dribble Attempts - home  < 8.5   to the right, improve=46.23388, (0 missing)
##   Surrogate splits:
##       2                        < 2.81  to the right, agree=0.839, adj=0.454, (0 split)
##       Passes - away            < 383.5 to the left,  agree=0.775, adj=0.238, (0 split)
##       Successful Passes - away < 321.5 to the left,  agree=0.774, adj=0.235, (0 split)
##       Ball Possession % - home < 37.5  to the right, agree=0.762, adj=0.196, (0 split)
##       Attacks - away           < 77.5  to the left,  agree=0.749, adj=0.151, (0 split)
## 
## Node number 15: 2667 observations
##   predicted class=X  expected loss=0.2665917  P(node) =0.1312113
##     class counts:   408   303  1956
##    probabilities: 0.153 0.114 0.733 
## 
## Node number 28: 3022 observations,    complexity param=0.005328673
##   predicted class=X  expected loss=0.5622105  P(node) =0.1486766
##     class counts:  1062   637  1323
##    probabilities: 0.351 0.211 0.438 
##   left son=56 (1555 obs) right son=57 (1467 obs)
##   Primary splits:
##       Goals - away             < 0.5   to the right, improve=42.83603, (0 missing)
##       Shots Insidebox - home   < 4.5   to the right, improve=39.32185, (0 missing)
##       Counter Attacks - away   < 4.5   to the left,  improve=31.32226, (0 missing)
##       Successful Passes - away < 180.5 to the left,  improve=31.01236, (0 missing)
##       Accurate Crosses - away  < 6.5   to the left,  improve=30.36431, (0 missing)
##   Surrogate splits:
##       Goals - home                        < 0.5   to the right, agree=0.919, adj=0.834, (0 split)
##       Shots On Target - home              < 2.5   to the right, agree=0.666, adj=0.312, (0 split)
##       Shots On Target - away              < 1.5   to the right, agree=0.665, adj=0.311, (0 split)
##       Successful Passes Percentage - home < 80.5  to the right, agree=0.630, adj=0.238, (0 split)
##       Total Crosses - home                < 11.5  to the right, agree=0.626, adj=0.230, (0 split)
## 
## Node number 29: 1268 observations,    complexity param=0.005328673
##   predicted class=X  expected loss=0.5536278  P(node) =0.06238315
##     class counts:   193   509   566
##    probabilities: 0.152 0.401 0.446 
##   left son=58 (561 obs) right son=59 (707 obs)
##   Primary splits:
##       Successful Passes - away < 276.5 to the left,  improve=40.35611, (0 missing)
##       Passes - away            < 334.5 to the left,  improve=39.59947, (0 missing)
##       Shots Insidebox - away   < 4.5   to the left,  improve=39.25581, (0 missing)
##       Tackles - away           < 3.5   to the left,  improve=35.62239, (0 missing)
##       Accurate Crosses - away  < 4.5   to the left,  improve=30.96487, (0 missing)
##   Surrogate splits:
##       Passes - away                       < 320.5 to the left,  agree=0.965, adj=0.922, (0 split)
##       Successful Passes Percentage - away < 82.5  to the left,  agree=0.855, adj=0.672, (0 split)
##       Ball Possession % - home            < 47.5  to the right, agree=0.818, adj=0.588, (0 split)
##       Attacks - away                      < 66.5  to the left,  agree=0.765, adj=0.469, (0 split)
##       X                                   < 2.685 to the right, agree=0.736, adj=0.403, (0 split)
## 
## Node number 56: 1555 observations,    complexity param=0.004987637
##   predicted class=1  expected loss=0.5665595  P(node) =0.076503
##     class counts:   674   327   554
##    probabilities: 0.433 0.210 0.356 
##   left son=112 (1484 obs) right son=113 (71 obs)
##   Primary splits:
##       Counter Attacks - away  < 4.5   to the left,  improve=33.42426, (0 missing)
##       Shots Off Target - away < 0.5   to the right, improve=29.42211, (0 missing)
##       Dribble Attempts - away < 14.5  to the right, improve=28.19578, (0 missing)
##       Dribble Attempts - home < 8.5   to the right, improve=24.64382, (0 missing)
##       Shots On Target - away  < 0.5   to the right, improve=23.84440, (0 missing)
##   Surrogate splits:
##       Shots On Target - away < 0.5   to the right, agree=0.969, adj=0.324, (0 split)
##       Counter Attacks - home < 4.5   to the left,  agree=0.958, adj=0.085, (0 split)
##       Saves - away           < 7.5   to the left,  agree=0.957, adj=0.056, (0 split)
##       Shots On Target - home < 9.5   to the left,  agree=0.957, adj=0.056, (0 split)
##       Shots Insidebox - away < 10.5  to the left,  agree=0.956, adj=0.028, (0 split)
## 
## Node number 57: 1467 observations
##   predicted class=X  expected loss=0.475801  P(node) =0.07217357
##     class counts:   388   310   769
##    probabilities: 0.264 0.211 0.524 
## 
## Node number 58: 561 observations
##   predicted class=2  expected loss=0.4777184  P(node) =0.02760012
##     class counts:   105   293   163
##    probabilities: 0.187 0.522 0.291 
## 
## Node number 59: 707 observations
##   predicted class=X  expected loss=0.4299859  P(node) =0.03478304
##     class counts:    88   216   403
##    probabilities: 0.124 0.306 0.570 
## 
## Node number 112: 1484 observations,    complexity param=0.004987637
##   predicted class=1  expected loss=0.5491914  P(node) =0.07300994
##     class counts:   669   274   541
##    probabilities: 0.451 0.185 0.365 
##   left son=224 (1374 obs) right son=225 (110 obs)
##   Primary splits:
##       Shots Off Target - away < 0.5   to the right, improve=30.56729, (0 missing)
##       Dribble Attempts - away < 16.5  to the right, improve=26.76614, (0 missing)
##       Free Kicks - home       < 6.5   to the right, improve=25.72035, (0 missing)
##       Dribble Attempts - home < 8.5   to the right, improve=23.98845, (0 missing)
##       Shots Outsidebox - away < 6.5   to the right, improve=22.47260, (0 missing)
##   Surrogate splits:
##       Throwins - home          < 24.5  to the left,  agree=0.933, adj=0.091, (0 split)
##       Shots Total - away       < 1.5   to the right, agree=0.932, adj=0.082, (0 split)
##       Offsides - home          < 4.5   to the left,  agree=0.927, adj=0.018, (0 split)
##       Total Crosses - away     < 1.5   to the right, agree=0.927, adj=0.018, (0 split)
##       Dangerous Attacks - away < 5.5   to the right, agree=0.927, adj=0.009, (0 split)
## 
## Node number 113: 71 observations
##   predicted class=2  expected loss=0.2535211  P(node) =0.003493063
##     class counts:     5    53    13
##    probabilities: 0.070 0.746 0.183 
## 
## Node number 224: 1374 observations
##   predicted class=1  expected loss=0.5189229  P(node) =0.06759815
##     class counts:   661   249   464
##    probabilities: 0.481 0.181 0.338 
## 
## Node number 225: 110 observations
##   predicted class=X  expected loss=0.3  P(node) =0.005411788
##     class counts:     8    25    77
##    probabilities: 0.073 0.227 0.700
rpart.plot(second_half_result$model, main = "Second Half Decision Tree", type = 3, extra = 104)
## Warning: Cannot retrieve the data used to build the model (so cannot determine roundint and is.binary for the variables).
## To silence this warning:
##     Call rpart.plot with roundint=FALSE,
##     or rebuild the rpart model with model=TRUE.

print("First Half Accuracy:")
## [1] "First Half Accuracy:"
print(round(first_half_result$accuracy, 2))
## [1] 0.62
print("Second Half Accuracy:")
## [1] "Second Half Accuracy:"
print(round(second_half_result$accuracy, 2))
## [1] 0.74

As we can see from the results, 1,2 and X are the most importans features for the match result prediction. We eliminated some features w/ high correlation during our analysis. Also eliminating extraordinary events helped us to have higher accuracy levels. Additionally, our second half accuracy is reasonable higher (0.74) than our first half accuracy (0.62). Possible reasons for the higher accuracy in the second half than the first half could be that the second half is generally more dynamic and predictable, team strategies become more apparent, and the tempo of the match increases. Additionally, irregularities in the dataset and some features becoming more significant in the second half may have helped the model make more accurate predictions. Changes in teams’ playing style and strategies may also affect this difference.